00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #define UNCHECKED_BITSTREAM_READER 1
00029
00030 #include <math.h>
00031 #include "avcodec.h"
00032 #include "get_bits.h"
00033 #include "put_bits.h"
00034 #include "wmavoice_data.h"
00035 #include "celp_math.h"
00036 #include "celp_filters.h"
00037 #include "acelp_vectors.h"
00038 #include "acelp_filters.h"
00039 #include "lsp.h"
00040 #include "libavutil/lzo.h"
00041 #include "dct.h"
00042 #include "rdft.h"
00043 #include "sinewin.h"
00044
00045 #define MAX_BLOCKS 8
00046 #define MAX_LSPS 16
00047 #define MAX_LSPS_ALIGN16 16
00048
00049 #define MAX_FRAMES 3
00050 #define MAX_FRAMESIZE 160
00051 #define MAX_SIGNAL_HISTORY 416
00052 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00054 #define SFRAME_CACHE_MAXSIZE 256
00055
00056 #define VLC_NBITS 6
00057
00058
00061 static VLC frame_type_vlc;
00062
00066 enum {
00067 ACB_TYPE_NONE = 0,
00068 ACB_TYPE_ASYMMETRIC = 1,
00069
00070
00071
00072
00073 ACB_TYPE_HAMMING = 2
00074
00075
00076 };
00077
00081 enum {
00082 FCB_TYPE_SILENCE = 0,
00083
00084
00085 FCB_TYPE_HARDCODED = 1,
00086
00087 FCB_TYPE_AW_PULSES = 2,
00088
00089 FCB_TYPE_EXC_PULSES = 3,
00090
00091
00092 };
00093
00097 static const struct frame_type_desc {
00098 uint8_t n_blocks;
00099
00100 uint8_t log_n_blocks;
00101 uint8_t acb_type;
00102 uint8_t fcb_type;
00103 uint8_t dbl_pulses;
00104
00105
00106 uint16_t frame_size;
00107
00108 } frame_descs[17] = {
00109 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00110 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00111 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00112 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00113 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00114 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00115 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00116 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00117 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00118 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00119 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00120 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00121 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00122 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00123 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00124 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00125 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00126 };
00127
00131 typedef struct {
00136 AVFrame frame;
00137 GetBitContext gb;
00138
00139
00140
00141 int8_t vbm_tree[25];
00142
00143 int spillover_bitsize;
00144
00145
00146 int history_nsamples;
00147
00148
00149
00150 int do_apf;
00151
00152 int denoise_strength;
00153
00154 int denoise_tilt_corr;
00155
00156 int dc_level;
00157
00158
00159 int lsps;
00160 int lsp_q_mode;
00161 int lsp_def_mode;
00162
00163 int frame_lsp_bitsize;
00164
00165 int sframe_lsp_bitsize;
00166
00167
00168 int min_pitch_val;
00169 int max_pitch_val;
00170 int pitch_nbits;
00171
00172 int block_pitch_nbits;
00173
00174 int block_pitch_range;
00175 int block_delta_pitch_nbits;
00176
00177
00178
00179 int block_delta_pitch_hrange;
00180
00181 uint16_t block_conv_table[4];
00182
00183
00193 int spillover_nbits;
00194
00195
00196
00197 int has_residual_lsps;
00198
00199
00200
00201
00202 int skip_bits_next;
00203
00204
00205
00206 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00209 int sframe_cache_size;
00210
00211
00212
00213
00214 PutBitContext pb;
00215
00225 double prev_lsps[MAX_LSPS];
00226
00227 int last_pitch_val;
00228 int last_acb_type;
00229 int pitch_diff_sh16;
00230
00231 float silence_gain;
00232
00233 int aw_idx_is_ext;
00234
00235 int aw_pulse_range;
00236
00237
00238
00239
00240
00241 int aw_n_pulses[2];
00242
00243
00244 int aw_first_pulse_off[2];
00245
00246 int aw_next_pulse_off_cache;
00247
00248
00249
00250
00251
00252 int frame_cntr;
00253
00254 float gain_pred_err[6];
00255 float excitation_history[MAX_SIGNAL_HISTORY];
00259 float synth_history[MAX_LSPS];
00260
00269 RDFTContext rdft, irdft;
00270
00271 DCTContext dct, dst;
00272
00273 float sin[511], cos[511];
00274
00275 float postfilter_agc;
00276
00277 float dcf_mem[2];
00278 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00281 float denoise_filter_cache[MAX_FRAMESIZE];
00282 int denoise_filter_cache_size;
00283 DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00285 DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00287 DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00290
00293 } WMAVoiceContext;
00294
00304 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00305 {
00306 static const uint8_t bits[] = {
00307 2, 2, 2, 4, 4, 4,
00308 6, 6, 6, 8, 8, 8,
00309 10, 10, 10, 12, 12, 12,
00310 14, 14, 14, 14
00311 };
00312 static const uint16_t codes[] = {
00313 0x0000, 0x0001, 0x0002,
00314 0x000c, 0x000d, 0x000e,
00315 0x003c, 0x003d, 0x003e,
00316 0x00fc, 0x00fd, 0x00fe,
00317 0x03fc, 0x03fd, 0x03fe,
00318 0x0ffc, 0x0ffd, 0x0ffe,
00319 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00320 };
00321 int cntr[8] = { 0 }, n, res;
00322
00323 memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00324 for (n = 0; n < 17; n++) {
00325 res = get_bits(gb, 3);
00326 if (cntr[res] > 3)
00327 return -1;
00328 vbm_tree[res * 3 + cntr[res]++] = n;
00329 }
00330 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00331 bits, 1, 1, codes, 2, 2, 132);
00332 return 0;
00333 }
00334
00338 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00339 {
00340 int n, flags, pitch_range, lsp16_flag;
00341 WMAVoiceContext *s = ctx->priv_data;
00342
00351 if (ctx->extradata_size != 46) {
00352 av_log(ctx, AV_LOG_ERROR,
00353 "Invalid extradata size %d (should be 46)\n",
00354 ctx->extradata_size);
00355 return -1;
00356 }
00357 flags = AV_RL32(ctx->extradata + 18);
00358 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00359 s->do_apf = flags & 0x1;
00360 if (s->do_apf) {
00361 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00362 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00363 ff_dct_init(&s->dct, 6, DCT_I);
00364 ff_dct_init(&s->dst, 6, DST_I);
00365
00366 ff_sine_window_init(s->cos, 256);
00367 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00368 for (n = 0; n < 255; n++) {
00369 s->sin[n] = -s->sin[510 - n];
00370 s->cos[510 - n] = s->cos[n];
00371 }
00372 }
00373 s->denoise_strength = (flags >> 2) & 0xF;
00374 if (s->denoise_strength >= 12) {
00375 av_log(ctx, AV_LOG_ERROR,
00376 "Invalid denoise filter strength %d (max=11)\n",
00377 s->denoise_strength);
00378 return -1;
00379 }
00380 s->denoise_tilt_corr = !!(flags & 0x40);
00381 s->dc_level = (flags >> 7) & 0xF;
00382 s->lsp_q_mode = !!(flags & 0x2000);
00383 s->lsp_def_mode = !!(flags & 0x4000);
00384 lsp16_flag = flags & 0x1000;
00385 if (lsp16_flag) {
00386 s->lsps = 16;
00387 s->frame_lsp_bitsize = 34;
00388 s->sframe_lsp_bitsize = 60;
00389 } else {
00390 s->lsps = 10;
00391 s->frame_lsp_bitsize = 24;
00392 s->sframe_lsp_bitsize = 48;
00393 }
00394 for (n = 0; n < s->lsps; n++)
00395 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00396
00397 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00398 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00399 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00400 return -1;
00401 }
00402
00403 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00404 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00405 pitch_range = s->max_pitch_val - s->min_pitch_val;
00406 if (pitch_range <= 0) {
00407 av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00408 return -1;
00409 }
00410 s->pitch_nbits = av_ceil_log2(pitch_range);
00411 s->last_pitch_val = 40;
00412 s->last_acb_type = ACB_TYPE_NONE;
00413 s->history_nsamples = s->max_pitch_val + 8;
00414
00415 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00416 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00417 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00418
00419 av_log(ctx, AV_LOG_ERROR,
00420 "Unsupported samplerate %d (min=%d, max=%d)\n",
00421 ctx->sample_rate, min_sr, max_sr);
00422
00423 return -1;
00424 }
00425
00426 s->block_conv_table[0] = s->min_pitch_val;
00427 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00428 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00429 s->block_conv_table[3] = s->max_pitch_val - 1;
00430 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00431 if (s->block_delta_pitch_hrange <= 0) {
00432 av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00433 return -1;
00434 }
00435 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00436 s->block_pitch_range = s->block_conv_table[2] +
00437 s->block_conv_table[3] + 1 +
00438 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00439 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00440
00441 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00442
00443 avcodec_get_frame_defaults(&s->frame);
00444 ctx->coded_frame = &s->frame;
00445
00446 return 0;
00447 }
00448
00470 static void adaptive_gain_control(float *out, const float *in,
00471 const float *speech_synth,
00472 int size, float alpha, float *gain_mem)
00473 {
00474 int i;
00475 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00476 float mem = *gain_mem;
00477
00478 for (i = 0; i < size; i++) {
00479 speech_energy += fabsf(speech_synth[i]);
00480 postfilter_energy += fabsf(in[i]);
00481 }
00482 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00483
00484 for (i = 0; i < size; i++) {
00485 mem = alpha * mem + gain_scale_factor;
00486 out[i] = in[i] * mem;
00487 }
00488
00489 *gain_mem = mem;
00490 }
00491
00510 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00511 const float *in, float *out, int size)
00512 {
00513 int n;
00514 float optimal_gain = 0, dot;
00515 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00516 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00517 *best_hist_ptr;
00518
00519
00520 do {
00521 dot = ff_dot_productf(in, ptr, size);
00522 if (dot > optimal_gain) {
00523 optimal_gain = dot;
00524 best_hist_ptr = ptr;
00525 }
00526 } while (--ptr >= end);
00527
00528 if (optimal_gain <= 0)
00529 return -1;
00530 dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00531 if (dot <= 0)
00532 return -1;
00533
00534 if (optimal_gain <= dot) {
00535 dot = dot / (dot + 0.6 * optimal_gain);
00536 } else
00537 dot = 0.625;
00538
00539
00540 for (n = 0; n < size; n++)
00541 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00542
00543 return 0;
00544 }
00545
00556 static float tilt_factor(const float *lpcs, int n_lpcs)
00557 {
00558 float rh0, rh1;
00559
00560 rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs);
00561 rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00562
00563 return rh1 / rh0;
00564 }
00565
00569 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00570 int fcb_type, float *coeffs, int remainder)
00571 {
00572 float last_coeff, min = 15.0, max = -15.0;
00573 float irange, angle_mul, gain_mul, range, sq;
00574 int n, idx;
00575
00576
00577 s->rdft.rdft_calc(&s->rdft, lpcs);
00578 #define log_range(var, assign) do { \
00579 float tmp = log10f(assign); var = tmp; \
00580 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00581 } while (0)
00582 log_range(last_coeff, lpcs[1] * lpcs[1]);
00583 for (n = 1; n < 64; n++)
00584 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00585 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00586 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00587 #undef log_range
00588 range = max - min;
00589 lpcs[64] = last_coeff;
00590
00591
00592
00593
00594
00595
00596 irange = 64.0 / range;
00597 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00598 (5.0 / 14.7));
00599 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00600 for (n = 0; n <= 64; n++) {
00601 float pwr;
00602
00603 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00604 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00605 lpcs[n] = angle_mul * pwr;
00606
00607
00608 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00609 if (idx > 127) {
00610 coeffs[n] = wmavoice_energy_table[127] *
00611 powf(1.0331663, idx - 127);
00612 } else
00613 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00614 }
00615
00616
00617
00618
00619
00620 s->dct.dct_calc(&s->dct, lpcs);
00621 s->dst.dct_calc(&s->dst, lpcs);
00622
00623
00624 idx = 255 + av_clip(lpcs[64], -255, 255);
00625 coeffs[0] = coeffs[0] * s->cos[idx];
00626 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00627 last_coeff = coeffs[64] * s->cos[idx];
00628 for (n = 63;; n--) {
00629 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00630 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00631 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00632
00633 if (!--n) break;
00634
00635 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00636 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00637 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00638 }
00639 coeffs[1] = last_coeff;
00640
00641
00642 s->irdft.rdft_calc(&s->irdft, coeffs);
00643
00644
00645 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00646 if (s->denoise_tilt_corr) {
00647 float tilt_mem = 0;
00648
00649 coeffs[remainder - 1] = 0;
00650 ff_tilt_compensation(&tilt_mem,
00651 -1.8 * tilt_factor(coeffs, remainder - 1),
00652 coeffs, remainder);
00653 }
00654 sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00655 for (n = 0; n < remainder; n++)
00656 coeffs[n] *= sq;
00657 }
00658
00685 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00686 float *synth_pf, int size,
00687 const float *lpcs)
00688 {
00689 int remainder, lim, n;
00690
00691 if (fcb_type != FCB_TYPE_SILENCE) {
00692 float *tilted_lpcs = s->tilted_lpcs_pf,
00693 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00694
00695 tilted_lpcs[0] = 1.0;
00696 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00697 memset(&tilted_lpcs[s->lsps + 1], 0,
00698 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00699 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00700 tilted_lpcs, s->lsps + 2);
00701
00702
00703
00704
00705
00706 remainder = FFMIN(127 - size, size - 1);
00707 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00708
00709
00710
00711 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00712 s->rdft.rdft_calc(&s->rdft, synth_pf);
00713 s->rdft.rdft_calc(&s->rdft, coeffs);
00714 synth_pf[0] *= coeffs[0];
00715 synth_pf[1] *= coeffs[1];
00716 for (n = 1; n < 64; n++) {
00717 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00718 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00719 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00720 }
00721 s->irdft.rdft_calc(&s->irdft, synth_pf);
00722 }
00723
00724
00725 if (s->denoise_filter_cache_size) {
00726 lim = FFMIN(s->denoise_filter_cache_size, size);
00727 for (n = 0; n < lim; n++)
00728 synth_pf[n] += s->denoise_filter_cache[n];
00729 s->denoise_filter_cache_size -= lim;
00730 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00731 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00732 }
00733
00734
00735 if (fcb_type != FCB_TYPE_SILENCE) {
00736 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00737 for (n = 0; n < lim; n++)
00738 s->denoise_filter_cache[n] += synth_pf[size + n];
00739 if (lim < remainder) {
00740 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00741 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00742 s->denoise_filter_cache_size = remainder;
00743 }
00744 }
00745 }
00746
00767 static void postfilter(WMAVoiceContext *s, const float *synth,
00768 float *samples, int size,
00769 const float *lpcs, float *zero_exc_pf,
00770 int fcb_type, int pitch)
00771 {
00772 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00773 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00774 *synth_filter_in = zero_exc_pf;
00775
00776 assert(size <= MAX_FRAMESIZE / 2);
00777
00778
00779 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00780
00781 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00782 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00783 synth_filter_in = synth_filter_in_buf;
00784
00785
00786 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00787 synth_filter_in, size, s->lsps);
00788 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00789 sizeof(synth_pf[0]) * s->lsps);
00790
00791 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00792
00793 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00794 &s->postfilter_agc);
00795
00796 if (s->dc_level > 8) {
00797
00798
00799
00800 ff_acelp_apply_order_2_transfer_function(samples, samples,
00801 (const float[2]) { -1.99997, 1.0 },
00802 (const float[2]) { -1.9330735188, 0.93589198496 },
00803 0.93980580475, s->dcf_mem, size);
00804 }
00805 }
00821 static void dequant_lsps(double *lsps, int num,
00822 const uint16_t *values,
00823 const uint16_t *sizes,
00824 int n_stages, const uint8_t *table,
00825 const double *mul_q,
00826 const double *base_q)
00827 {
00828 int n, m;
00829
00830 memset(lsps, 0, num * sizeof(*lsps));
00831 for (n = 0; n < n_stages; n++) {
00832 const uint8_t *t_off = &table[values[n] * num];
00833 double base = base_q[n], mul = mul_q[n];
00834
00835 for (m = 0; m < num; m++)
00836 lsps[m] += base + mul * t_off[m];
00837
00838 table += sizes[n] * num;
00839 }
00840 }
00841
00853 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00854 {
00855 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00856 static const double mul_lsf[4] = {
00857 5.2187144800e-3, 1.4626986422e-3,
00858 9.6179549166e-4, 1.1325736225e-3
00859 };
00860 static const double base_lsf[4] = {
00861 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00862 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00863 };
00864 uint16_t v[4];
00865
00866 v[0] = get_bits(gb, 8);
00867 v[1] = get_bits(gb, 6);
00868 v[2] = get_bits(gb, 5);
00869 v[3] = get_bits(gb, 5);
00870
00871 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00872 mul_lsf, base_lsf);
00873 }
00874
00879 static void dequant_lsp10r(GetBitContext *gb,
00880 double *i_lsps, const double *old,
00881 double *a1, double *a2, int q_mode)
00882 {
00883 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00884 static const double mul_lsf[3] = {
00885 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00886 };
00887 static const double base_lsf[3] = {
00888 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00889 };
00890 const float (*ipol_tab)[2][10] = q_mode ?
00891 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00892 uint16_t interpol, v[3];
00893 int n;
00894
00895 dequant_lsp10i(gb, i_lsps);
00896
00897 interpol = get_bits(gb, 5);
00898 v[0] = get_bits(gb, 7);
00899 v[1] = get_bits(gb, 6);
00900 v[2] = get_bits(gb, 6);
00901
00902 for (n = 0; n < 10; n++) {
00903 double delta = old[n] - i_lsps[n];
00904 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00905 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00906 }
00907
00908 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00909 mul_lsf, base_lsf);
00910 }
00911
00915 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00916 {
00917 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00918 static const double mul_lsf[5] = {
00919 3.3439586280e-3, 6.9908173703e-4,
00920 3.3216608306e-3, 1.0334960326e-3,
00921 3.1899104283e-3
00922 };
00923 static const double base_lsf[5] = {
00924 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00925 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00926 M_PI * -1.29816e-1
00927 };
00928 uint16_t v[5];
00929
00930 v[0] = get_bits(gb, 8);
00931 v[1] = get_bits(gb, 6);
00932 v[2] = get_bits(gb, 7);
00933 v[3] = get_bits(gb, 6);
00934 v[4] = get_bits(gb, 7);
00935
00936 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00937 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00938 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00939 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00940 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00941 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00942 }
00943
00948 static void dequant_lsp16r(GetBitContext *gb,
00949 double *i_lsps, const double *old,
00950 double *a1, double *a2, int q_mode)
00951 {
00952 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00953 static const double mul_lsf[3] = {
00954 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00955 };
00956 static const double base_lsf[3] = {
00957 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00958 };
00959 const float (*ipol_tab)[2][16] = q_mode ?
00960 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00961 uint16_t interpol, v[3];
00962 int n;
00963
00964 dequant_lsp16i(gb, i_lsps);
00965
00966 interpol = get_bits(gb, 5);
00967 v[0] = get_bits(gb, 7);
00968 v[1] = get_bits(gb, 7);
00969 v[2] = get_bits(gb, 7);
00970
00971 for (n = 0; n < 16; n++) {
00972 double delta = old[n] - i_lsps[n];
00973 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00974 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00975 }
00976
00977 dequant_lsps( a2, 10, v, vec_sizes, 1,
00978 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00979 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00980 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00981 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00982 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00983 }
00984
00998 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
00999 const int *pitch)
01000 {
01001 static const int16_t start_offset[94] = {
01002 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
01003 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
01004 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
01005 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
01006 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
01007 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
01008 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01009 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01010 };
01011 int bits, offset;
01012
01013
01014 s->aw_idx_is_ext = 0;
01015 if ((bits = get_bits(gb, 6)) >= 54) {
01016 s->aw_idx_is_ext = 1;
01017 bits += (bits - 54) * 3 + get_bits(gb, 2);
01018 }
01019
01020
01021
01022 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01023 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01024 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01025 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01026 offset += s->aw_n_pulses[0] * pitch[0];
01027 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01028 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01029
01030
01031
01032
01033 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01034 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01035 s->aw_first_pulse_off[1] -= pitch[1];
01036 if (start_offset[bits] < 0)
01037 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01038 s->aw_first_pulse_off[0] -= pitch[0];
01039 }
01040 }
01041
01049 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01050 int block_idx, AMRFixed *fcb)
01051 {
01052 uint16_t use_mask_mem[9];
01053 uint16_t *use_mask = use_mask_mem + 2;
01054
01055
01056
01057
01058
01059
01060
01061 int pulse_off = s->aw_first_pulse_off[block_idx],
01062 pulse_start, n, idx, range, aidx, start_off = 0;
01063
01064
01065 if (s->aw_n_pulses[block_idx] > 0)
01066 while (pulse_off + s->aw_pulse_range < 1)
01067 pulse_off += fcb->pitch_lag;
01068
01069
01070 if (s->aw_n_pulses[0] > 0) {
01071 if (block_idx == 0) {
01072 range = 32;
01073 } else {
01074 range = 8;
01075 if (s->aw_n_pulses[block_idx] > 0)
01076 pulse_off = s->aw_next_pulse_off_cache;
01077 }
01078 } else
01079 range = 16;
01080 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01081
01082
01083
01084
01085 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01086 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01087 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01088 if (s->aw_n_pulses[block_idx] > 0)
01089 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01090 int excl_range = s->aw_pulse_range;
01091 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01092 int first_sh = 16 - (idx & 15);
01093 *use_mask_ptr++ &= 0xFFFFu << first_sh;
01094 excl_range -= first_sh;
01095 if (excl_range >= 16) {
01096 *use_mask_ptr++ = 0;
01097 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01098 } else
01099 *use_mask_ptr &= 0xFFFF >> excl_range;
01100 }
01101
01102
01103 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01104 for (n = 0; n <= aidx; pulse_start++) {
01105 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01106 if (idx >= MAX_FRAMESIZE / 2) {
01107 if (use_mask[0]) idx = 0x0F;
01108 else if (use_mask[1]) idx = 0x1F;
01109 else if (use_mask[2]) idx = 0x2F;
01110 else if (use_mask[3]) idx = 0x3F;
01111 else if (use_mask[4]) idx = 0x4F;
01112 else return;
01113 idx -= av_log2_16bit(use_mask[idx >> 4]);
01114 }
01115 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01116 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01117 n++;
01118 start_off = idx;
01119 }
01120 }
01121
01122 fcb->x[fcb->n] = start_off;
01123 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01124 fcb->n++;
01125
01126
01127 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01128 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01129 }
01130
01138 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01139 int block_idx, AMRFixed *fcb)
01140 {
01141 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01142 float v;
01143
01144 if (s->aw_n_pulses[block_idx] > 0) {
01145 int n, v_mask, i_mask, sh, n_pulses;
01146
01147 if (s->aw_pulse_range == 24) {
01148 n_pulses = 3;
01149 v_mask = 8;
01150 i_mask = 7;
01151 sh = 4;
01152 } else {
01153 n_pulses = 4;
01154 v_mask = 4;
01155 i_mask = 3;
01156 sh = 3;
01157 }
01158
01159 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01160 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01161 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01162 s->aw_first_pulse_off[block_idx];
01163 while (fcb->x[fcb->n] < 0)
01164 fcb->x[fcb->n] += fcb->pitch_lag;
01165 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01166 fcb->n++;
01167 }
01168 } else {
01169 int num2 = (val & 0x1FF) >> 1, delta, idx;
01170
01171 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01172 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01173 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01174 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01175 v = (val & 0x200) ? -1.0 : 1.0;
01176
01177 fcb->no_repeat_mask |= 3 << fcb->n;
01178 fcb->x[fcb->n] = idx - delta;
01179 fcb->y[fcb->n] = v;
01180 fcb->x[fcb->n + 1] = idx;
01181 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01182 fcb->n += 2;
01183 }
01184 }
01185
01199 static int pRNG(int frame_cntr, int block_num, int block_size)
01200 {
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211 static const unsigned int div_tbl[9][2] = {
01212 { 8332, 3 * 715827883U },
01213 { 4545, 0 * 390451573U },
01214 { 3124, 11 * 268435456U },
01215 { 2380, 15 * 204522253U },
01216 { 1922, 23 * 165191050U },
01217 { 1612, 23 * 138547333U },
01218 { 1388, 27 * 119304648U },
01219 { 1219, 16 * 104755300U },
01220 { 1086, 39 * 93368855U }
01221 };
01222 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01223 if (x >= 0xFFFF) x -= 0xFFFF;
01224
01225 y = x - 9 * MULH(477218589, x);
01226 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01227
01228 return z % (1000 - block_size);
01229 }
01230
01235 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01236 int block_idx, int size,
01237 const struct frame_type_desc *frame_desc,
01238 float *excitation)
01239 {
01240 float gain;
01241 int n, r_idx;
01242
01243 assert(size <= MAX_FRAMESIZE);
01244
01245
01246 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01247 r_idx = pRNG(s->frame_cntr, block_idx, size);
01248 gain = s->silence_gain;
01249 } else {
01250 r_idx = get_bits(gb, 8);
01251 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01252 }
01253
01254
01255 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01256
01257
01258 for (n = 0; n < size; n++)
01259 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01260 }
01261
01266 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01267 int block_idx, int size,
01268 int block_pitch_sh2,
01269 const struct frame_type_desc *frame_desc,
01270 float *excitation)
01271 {
01272 static const float gain_coeff[6] = {
01273 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01274 };
01275 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01276 int n, idx, gain_weight;
01277 AMRFixed fcb;
01278
01279 assert(size <= MAX_FRAMESIZE / 2);
01280 memset(pulses, 0, sizeof(*pulses) * size);
01281
01282 fcb.pitch_lag = block_pitch_sh2 >> 2;
01283 fcb.pitch_fac = 1.0;
01284 fcb.no_repeat_mask = 0;
01285 fcb.n = 0;
01286
01287
01288
01289 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01290 aw_pulse_set1(s, gb, block_idx, &fcb);
01291 aw_pulse_set2(s, gb, block_idx, &fcb);
01292 } else {
01293 int offset_nbits = 5 - frame_desc->log_n_blocks;
01294
01295 fcb.no_repeat_mask = -1;
01296
01297
01298 for (n = 0; n < 5; n++) {
01299 float sign;
01300 int pos1, pos2;
01301
01302 sign = get_bits1(gb) ? 1.0 : -1.0;
01303 pos1 = get_bits(gb, offset_nbits);
01304 fcb.x[fcb.n] = n + 5 * pos1;
01305 fcb.y[fcb.n++] = sign;
01306 if (n < frame_desc->dbl_pulses) {
01307 pos2 = get_bits(gb, offset_nbits);
01308 fcb.x[fcb.n] = n + 5 * pos2;
01309 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01310 }
01311 }
01312 }
01313 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01314
01315
01316
01317 idx = get_bits(gb, 7);
01318 fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01319 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01320 acb_gain = wmavoice_gain_codebook_acb[idx];
01321 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01322 -2.9957322736 ,
01323 1.6094379124 );
01324
01325 gain_weight = 8 >> frame_desc->log_n_blocks;
01326 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01327 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01328 for (n = 0; n < gain_weight; n++)
01329 s->gain_pred_err[n] = pred_err;
01330
01331
01332 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01333 int len;
01334 for (n = 0; n < size; n += len) {
01335 int next_idx_sh16;
01336 int abs_idx = block_idx * size + n;
01337 int pitch_sh16 = (s->last_pitch_val << 16) +
01338 s->pitch_diff_sh16 * abs_idx;
01339 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01340 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01341 idx = idx_sh16 >> 16;
01342 if (s->pitch_diff_sh16) {
01343 if (s->pitch_diff_sh16 > 0) {
01344 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01345 } else
01346 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01347 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01348 1, size - n);
01349 } else
01350 len = size;
01351
01352 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01353 wmavoice_ipol1_coeffs, 17,
01354 idx, 9, len);
01355 }
01356 } else {
01357 int block_pitch = block_pitch_sh2 >> 2;
01358 idx = block_pitch_sh2 & 3;
01359 if (idx) {
01360 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01361 wmavoice_ipol2_coeffs, 4,
01362 idx, 8, size);
01363 } else
01364 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01365 sizeof(float) * size);
01366 }
01367
01368
01369 ff_weighted_vector_sumf(excitation, excitation, pulses,
01370 acb_gain, fcb_gain, size);
01371 }
01372
01389 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01390 int block_idx, int size,
01391 int block_pitch_sh2,
01392 const double *lsps, const double *prev_lsps,
01393 const struct frame_type_desc *frame_desc,
01394 float *excitation, float *synth)
01395 {
01396 double i_lsps[MAX_LSPS];
01397 float lpcs[MAX_LSPS];
01398 float fac;
01399 int n;
01400
01401 if (frame_desc->acb_type == ACB_TYPE_NONE)
01402 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01403 else
01404 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01405 frame_desc, excitation);
01406
01407
01408 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01409 for (n = 0; n < s->lsps; n++)
01410 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01411 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01412
01413
01414 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01415 }
01416
01432 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01433 float *samples,
01434 const double *lsps, const double *prev_lsps,
01435 float *excitation, float *synth)
01436 {
01437 WMAVoiceContext *s = ctx->priv_data;
01438 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01439 int pitch[MAX_BLOCKS], last_block_pitch;
01440
01441
01442 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01443
01444 if (bd_idx < 0) {
01445 av_log(ctx, AV_LOG_ERROR,
01446 "Invalid frame type VLC code, skipping\n");
01447 return -1;
01448 }
01449
01450 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01451
01452
01453 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01454
01455
01456
01457
01458 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01459 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01460 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01461 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01462 if (s->last_acb_type == ACB_TYPE_NONE ||
01463 20 * abs(cur_pitch_val - s->last_pitch_val) >
01464 (cur_pitch_val + s->last_pitch_val))
01465 s->last_pitch_val = cur_pitch_val;
01466
01467
01468 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01469 int fac = n * 2 + 1;
01470
01471 pitch[n] = (MUL16(fac, cur_pitch_val) +
01472 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01473 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01474 }
01475
01476
01477 s->pitch_diff_sh16 =
01478 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01479 }
01480
01481
01482 switch (frame_descs[bd_idx].fcb_type) {
01483 case FCB_TYPE_SILENCE:
01484 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01485 break;
01486 case FCB_TYPE_AW_PULSES:
01487 aw_parse_coords(s, gb, pitch);
01488 break;
01489 }
01490
01491 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01492 int bl_pitch_sh2;
01493
01494
01495 switch (frame_descs[bd_idx].acb_type) {
01496 case ACB_TYPE_HAMMING: {
01497
01498
01499
01500
01501
01502 int block_pitch,
01503 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01504 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01505 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01506
01507 if (n == 0) {
01508 block_pitch = get_bits(gb, s->block_pitch_nbits);
01509 } else
01510 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01511 get_bits(gb, s->block_delta_pitch_nbits);
01512
01513 last_block_pitch = av_clip(block_pitch,
01514 s->block_delta_pitch_hrange,
01515 s->block_pitch_range -
01516 s->block_delta_pitch_hrange);
01517
01518
01519 if (block_pitch < t1) {
01520 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01521 } else {
01522 block_pitch -= t1;
01523 if (block_pitch < t2) {
01524 bl_pitch_sh2 =
01525 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01526 } else {
01527 block_pitch -= t2;
01528 if (block_pitch < t3) {
01529 bl_pitch_sh2 =
01530 (s->block_conv_table[2] + block_pitch) << 2;
01531 } else
01532 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01533 }
01534 }
01535 pitch[n] = bl_pitch_sh2 >> 2;
01536 break;
01537 }
01538
01539 case ACB_TYPE_ASYMMETRIC: {
01540 bl_pitch_sh2 = pitch[n] << 2;
01541 break;
01542 }
01543
01544 default:
01545 bl_pitch_sh2 = 0;
01546 break;
01547 }
01548
01549 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01550 lsps, prev_lsps, &frame_descs[bd_idx],
01551 &excitation[n * block_nsamples],
01552 &synth[n * block_nsamples]);
01553 }
01554
01555
01556
01557 if (s->do_apf) {
01558 double i_lsps[MAX_LSPS];
01559 float lpcs[MAX_LSPS];
01560
01561 for (n = 0; n < s->lsps; n++)
01562 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01563 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01564 postfilter(s, synth, samples, 80, lpcs,
01565 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01566 frame_descs[bd_idx].fcb_type, pitch[0]);
01567
01568 for (n = 0; n < s->lsps; n++)
01569 i_lsps[n] = cos(lsps[n]);
01570 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01571 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01572 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01573 frame_descs[bd_idx].fcb_type, pitch[0]);
01574 } else
01575 memcpy(samples, synth, 160 * sizeof(synth[0]));
01576
01577
01578 s->frame_cntr++;
01579 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01580 s->last_acb_type = frame_descs[bd_idx].acb_type;
01581 switch (frame_descs[bd_idx].acb_type) {
01582 case ACB_TYPE_NONE:
01583 s->last_pitch_val = 0;
01584 break;
01585 case ACB_TYPE_ASYMMETRIC:
01586 s->last_pitch_val = cur_pitch_val;
01587 break;
01588 case ACB_TYPE_HAMMING:
01589 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01590 break;
01591 }
01592
01593 return 0;
01594 }
01595
01608 static void stabilize_lsps(double *lsps, int num)
01609 {
01610 int n, m, l;
01611
01612
01613
01614
01615 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01616 for (n = 1; n < num; n++)
01617 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01618 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01619
01620
01621
01622 for (n = 1; n < num; n++) {
01623 if (lsps[n] < lsps[n - 1]) {
01624 for (m = 1; m < num; m++) {
01625 double tmp = lsps[m];
01626 for (l = m - 1; l >= 0; l--) {
01627 if (lsps[l] <= tmp) break;
01628 lsps[l + 1] = lsps[l];
01629 }
01630 lsps[l + 1] = tmp;
01631 }
01632 break;
01633 }
01634 }
01635 }
01636
01646 static int check_bits_for_superframe(GetBitContext *orig_gb,
01647 WMAVoiceContext *s)
01648 {
01649 GetBitContext s_gb, *gb = &s_gb;
01650 int n, need_bits, bd_idx;
01651 const struct frame_type_desc *frame_desc;
01652
01653
01654 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01655 skip_bits_long(gb, get_bits_count(orig_gb));
01656 assert(get_bits_left(gb) == get_bits_left(orig_gb));
01657
01658
01659 if (get_bits_left(gb) < 14)
01660 return 1;
01661 if (!get_bits1(gb))
01662 return -1;
01663 if (get_bits1(gb)) skip_bits(gb, 12);
01664 if (s->has_residual_lsps) {
01665 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01666 return 1;
01667 skip_bits_long(gb, s->sframe_lsp_bitsize);
01668 }
01669
01670
01671 for (n = 0; n < MAX_FRAMES; n++) {
01672 int aw_idx_is_ext = 0;
01673
01674 if (!s->has_residual_lsps) {
01675 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01676 skip_bits_long(gb, s->frame_lsp_bitsize);
01677 }
01678 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01679 if (bd_idx < 0)
01680 return -1;
01681 frame_desc = &frame_descs[bd_idx];
01682 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01683 if (get_bits_left(gb) < s->pitch_nbits)
01684 return 1;
01685 skip_bits_long(gb, s->pitch_nbits);
01686 }
01687 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01688 skip_bits(gb, 8);
01689 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01690 int tmp = get_bits(gb, 6);
01691 if (tmp >= 0x36) {
01692 skip_bits(gb, 2);
01693 aw_idx_is_ext = 1;
01694 }
01695 }
01696
01697
01698 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01699 need_bits = s->block_pitch_nbits +
01700 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01701 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01702 need_bits = 2 * !aw_idx_is_ext;
01703 } else
01704 need_bits = 0;
01705 need_bits += frame_desc->frame_size;
01706 if (get_bits_left(gb) < need_bits)
01707 return 1;
01708 skip_bits_long(gb, need_bits);
01709 }
01710
01711 return 0;
01712 }
01713
01731 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01732 {
01733 WMAVoiceContext *s = ctx->priv_data;
01734 GetBitContext *gb = &s->gb, s_gb;
01735 int n, res, n_samples = 480;
01736 double lsps[MAX_FRAMES][MAX_LSPS];
01737 const double *mean_lsf = s->lsps == 16 ?
01738 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01739 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01740 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01741 float *samples;
01742
01743 memcpy(synth, s->synth_history,
01744 s->lsps * sizeof(*synth));
01745 memcpy(excitation, s->excitation_history,
01746 s->history_nsamples * sizeof(*excitation));
01747
01748 if (s->sframe_cache_size > 0) {
01749 gb = &s_gb;
01750 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01751 s->sframe_cache_size = 0;
01752 }
01753
01754 if ((res = check_bits_for_superframe(gb, s)) == 1) {
01755 *got_frame_ptr = 0;
01756 return 1;
01757 }
01758
01759
01760
01761
01762
01763 if (!get_bits1(gb)) {
01764 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01765 return -1;
01766 }
01767
01768
01769 if (get_bits1(gb)) {
01770 if ((n_samples = get_bits(gb, 12)) > 480) {
01771 av_log(ctx, AV_LOG_ERROR,
01772 "Superframe encodes >480 samples (%d), not allowed\n",
01773 n_samples);
01774 return -1;
01775 }
01776 }
01777
01778 if (s->has_residual_lsps) {
01779 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01780
01781 for (n = 0; n < s->lsps; n++)
01782 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01783
01784 if (s->lsps == 10) {
01785 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01786 } else
01787 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01788
01789 for (n = 0; n < s->lsps; n++) {
01790 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01791 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01792 lsps[2][n] += mean_lsf[n];
01793 }
01794 for (n = 0; n < 3; n++)
01795 stabilize_lsps(lsps[n], s->lsps);
01796 }
01797
01798
01799 s->frame.nb_samples = 480;
01800 if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) {
01801 av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01802 return res;
01803 }
01804 s->frame.nb_samples = n_samples;
01805 samples = (float *)s->frame.data[0];
01806
01807
01808 for (n = 0; n < 3; n++) {
01809 if (!s->has_residual_lsps) {
01810 int m;
01811
01812 if (s->lsps == 10) {
01813 dequant_lsp10i(gb, lsps[n]);
01814 } else
01815 dequant_lsp16i(gb, lsps[n]);
01816
01817 for (m = 0; m < s->lsps; m++)
01818 lsps[n][m] += mean_lsf[m];
01819 stabilize_lsps(lsps[n], s->lsps);
01820 }
01821
01822 if ((res = synth_frame(ctx, gb, n,
01823 &samples[n * MAX_FRAMESIZE],
01824 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01825 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01826 &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01827 *got_frame_ptr = 0;
01828 return res;
01829 }
01830 }
01831
01832
01833
01834
01835 if (get_bits1(gb)) {
01836 res = get_bits(gb, 4);
01837 skip_bits(gb, 10 * (res + 1));
01838 }
01839
01840 *got_frame_ptr = 1;
01841
01842
01843 memcpy(s->prev_lsps, lsps[2],
01844 s->lsps * sizeof(*s->prev_lsps));
01845 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01846 s->lsps * sizeof(*synth));
01847 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01848 s->history_nsamples * sizeof(*excitation));
01849 if (s->do_apf)
01850 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01851 s->history_nsamples * sizeof(*s->zero_exc_pf));
01852
01853 return 0;
01854 }
01855
01863 static int parse_packet_header(WMAVoiceContext *s)
01864 {
01865 GetBitContext *gb = &s->gb;
01866 unsigned int res;
01867
01868 if (get_bits_left(gb) < 11)
01869 return 1;
01870 skip_bits(gb, 4);
01871 s->has_residual_lsps = get_bits1(gb);
01872 do {
01873 res = get_bits(gb, 6);
01874
01875 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01876 return 1;
01877 } while (res == 0x3F);
01878 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01879
01880 return 0;
01881 }
01882
01898 static void copy_bits(PutBitContext *pb,
01899 const uint8_t *data, int size,
01900 GetBitContext *gb, int nbits)
01901 {
01902 int rmn_bytes, rmn_bits;
01903
01904 rmn_bits = rmn_bytes = get_bits_left(gb);
01905 if (rmn_bits < nbits)
01906 return;
01907 if (nbits > pb->size_in_bits - put_bits_count(pb))
01908 return;
01909 rmn_bits &= 7; rmn_bytes >>= 3;
01910 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01911 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01912 avpriv_copy_bits(pb, data + size - rmn_bytes,
01913 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01914 }
01915
01927 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01928 int *got_frame_ptr, AVPacket *avpkt)
01929 {
01930 WMAVoiceContext *s = ctx->priv_data;
01931 GetBitContext *gb = &s->gb;
01932 int size, res, pos;
01933
01934
01935
01936
01937
01938
01939 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01940 if (!size) {
01941 *got_frame_ptr = 0;
01942 return 0;
01943 }
01944 init_get_bits(&s->gb, avpkt->data, size << 3);
01945
01946
01947
01948
01949 if (size == ctx->block_align) {
01950 if ((res = parse_packet_header(s)) < 0)
01951 return res;
01952
01953
01954
01955
01956 if (s->spillover_nbits > 0) {
01957 if (s->sframe_cache_size > 0) {
01958 int cnt = get_bits_count(gb);
01959 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01960 flush_put_bits(&s->pb);
01961 s->sframe_cache_size += s->spillover_nbits;
01962 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01963 *got_frame_ptr) {
01964 cnt += s->spillover_nbits;
01965 s->skip_bits_next = cnt & 7;
01966 *(AVFrame *)data = s->frame;
01967 return cnt >> 3;
01968 } else
01969 skip_bits_long (gb, s->spillover_nbits - cnt +
01970 get_bits_count(gb));
01971 } else
01972 skip_bits_long(gb, s->spillover_nbits);
01973 }
01974 } else if (s->skip_bits_next)
01975 skip_bits(gb, s->skip_bits_next);
01976
01977
01978 s->sframe_cache_size = 0;
01979 s->skip_bits_next = 0;
01980 pos = get_bits_left(gb);
01981 if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01982 return res;
01983 } else if (*got_frame_ptr) {
01984 int cnt = get_bits_count(gb);
01985 s->skip_bits_next = cnt & 7;
01986 *(AVFrame *)data = s->frame;
01987 return cnt >> 3;
01988 } else if ((s->sframe_cache_size = pos) > 0) {
01989
01990 init_get_bits(gb, avpkt->data, size << 3);
01991 skip_bits_long(gb, (size << 3) - pos);
01992 assert(get_bits_left(gb) == pos);
01993
01994
01995 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01996 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01997
01998
01999 }
02000
02001 return size;
02002 }
02003
02004 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02005 {
02006 WMAVoiceContext *s = ctx->priv_data;
02007
02008 if (s->do_apf) {
02009 ff_rdft_end(&s->rdft);
02010 ff_rdft_end(&s->irdft);
02011 ff_dct_end(&s->dct);
02012 ff_dct_end(&s->dst);
02013 }
02014
02015 return 0;
02016 }
02017
02018 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02019 {
02020 WMAVoiceContext *s = ctx->priv_data;
02021 int n;
02022
02023 s->postfilter_agc = 0;
02024 s->sframe_cache_size = 0;
02025 s->skip_bits_next = 0;
02026 for (n = 0; n < s->lsps; n++)
02027 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02028 memset(s->excitation_history, 0,
02029 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02030 memset(s->synth_history, 0,
02031 sizeof(*s->synth_history) * MAX_LSPS);
02032 memset(s->gain_pred_err, 0,
02033 sizeof(s->gain_pred_err));
02034
02035 if (s->do_apf) {
02036 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02037 sizeof(*s->synth_filter_out_buf) * s->lsps);
02038 memset(s->dcf_mem, 0,
02039 sizeof(*s->dcf_mem) * 2);
02040 memset(s->zero_exc_pf, 0,
02041 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02042 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02043 }
02044 }
02045
02046 AVCodec ff_wmavoice_decoder = {
02047 .name = "wmavoice",
02048 .type = AVMEDIA_TYPE_AUDIO,
02049 .id = CODEC_ID_WMAVOICE,
02050 .priv_data_size = sizeof(WMAVoiceContext),
02051 .init = wmavoice_decode_init,
02052 .close = wmavoice_decode_end,
02053 .decode = wmavoice_decode_packet,
02054 .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02055 .flush = wmavoice_flush,
02056 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02057 };