00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #define UNCHECKED_BITSTREAM_READER 1
00029
00030 #include <math.h>
00031 #include "avcodec.h"
00032 #include "get_bits.h"
00033 #include "put_bits.h"
00034 #include "wmavoice_data.h"
00035 #include "celp_math.h"
00036 #include "celp_filters.h"
00037 #include "acelp_vectors.h"
00038 #include "acelp_filters.h"
00039 #include "lsp.h"
00040 #include "libavutil/lzo.h"
00041 #include "dct.h"
00042 #include "rdft.h"
00043 #include "sinewin.h"
00044
00045 #define MAX_BLOCKS 8
00046 #define MAX_LSPS 16
00047 #define MAX_LSPS_ALIGN16 16
00048
00049 #define MAX_FRAMES 3
00050 #define MAX_FRAMESIZE 160
00051 #define MAX_SIGNAL_HISTORY 416
00052 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00054 #define SFRAME_CACHE_MAXSIZE 256
00055
00056 #define VLC_NBITS 6
00057
00058
00061 static VLC frame_type_vlc;
00062
00066 enum {
00067 ACB_TYPE_NONE = 0,
00068 ACB_TYPE_ASYMMETRIC = 1,
00069
00070
00071
00072
00073 ACB_TYPE_HAMMING = 2
00074
00075
00076 };
00077
00081 enum {
00082 FCB_TYPE_SILENCE = 0,
00083
00084
00085 FCB_TYPE_HARDCODED = 1,
00086
00087 FCB_TYPE_AW_PULSES = 2,
00088
00089 FCB_TYPE_EXC_PULSES = 3,
00090
00091
00092 };
00093
00097 static const struct frame_type_desc {
00098 uint8_t n_blocks;
00099
00100 uint8_t log_n_blocks;
00101 uint8_t acb_type;
00102 uint8_t fcb_type;
00103 uint8_t dbl_pulses;
00104
00105
00106 uint16_t frame_size;
00107
00108 } frame_descs[17] = {
00109 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00110 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00111 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00112 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00113 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00114 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00115 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00116 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00117 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00118 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00119 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00120 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00121 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00122 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00123 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00124 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00125 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00126 };
00127
00131 typedef struct {
00136 AVFrame frame;
00137 GetBitContext gb;
00138
00139
00140
00141 int8_t vbm_tree[25];
00142
00143 int spillover_bitsize;
00144
00145
00146 int history_nsamples;
00147
00148
00149
00150 int do_apf;
00151
00152 int denoise_strength;
00153
00154 int denoise_tilt_corr;
00155
00156 int dc_level;
00157
00158
00159 int lsps;
00160 int lsp_q_mode;
00161 int lsp_def_mode;
00162
00163 int frame_lsp_bitsize;
00164
00165 int sframe_lsp_bitsize;
00166
00167
00168 int min_pitch_val;
00169 int max_pitch_val;
00170 int pitch_nbits;
00171
00172 int block_pitch_nbits;
00173
00174 int block_pitch_range;
00175 int block_delta_pitch_nbits;
00176
00177
00178
00179 int block_delta_pitch_hrange;
00180
00181 uint16_t block_conv_table[4];
00182
00183
00193 int spillover_nbits;
00194
00195
00196
00197 int has_residual_lsps;
00198
00199
00200
00201
00202 int skip_bits_next;
00203
00204
00205
00206 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00209 int sframe_cache_size;
00210
00211
00212
00213
00214 PutBitContext pb;
00215
00225 double prev_lsps[MAX_LSPS];
00226
00227 int last_pitch_val;
00228 int last_acb_type;
00229 int pitch_diff_sh16;
00230
00231 float silence_gain;
00232
00233 int aw_idx_is_ext;
00234
00235 int aw_pulse_range;
00236
00237
00238
00239
00240
00241 int aw_n_pulses[2];
00242
00243
00244 int aw_first_pulse_off[2];
00245
00246 int aw_next_pulse_off_cache;
00247
00248
00249
00250
00251
00252 int frame_cntr;
00253
00254 float gain_pred_err[6];
00255 float excitation_history[MAX_SIGNAL_HISTORY];
00259 float synth_history[MAX_LSPS];
00260
00269 RDFTContext rdft, irdft;
00270
00271 DCTContext dct, dst;
00272
00273 float sin[511], cos[511];
00274
00275 float postfilter_agc;
00276
00277 float dcf_mem[2];
00278 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00281 float denoise_filter_cache[MAX_FRAMESIZE];
00282 int denoise_filter_cache_size;
00283 DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00285 DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00287 DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00290
00293 } WMAVoiceContext;
00294
00304 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00305 {
00306 static const uint8_t bits[] = {
00307 2, 2, 2, 4, 4, 4,
00308 6, 6, 6, 8, 8, 8,
00309 10, 10, 10, 12, 12, 12,
00310 14, 14, 14, 14
00311 };
00312 static const uint16_t codes[] = {
00313 0x0000, 0x0001, 0x0002,
00314 0x000c, 0x000d, 0x000e,
00315 0x003c, 0x003d, 0x003e,
00316 0x00fc, 0x00fd, 0x00fe,
00317 0x03fc, 0x03fd, 0x03fe,
00318 0x0ffc, 0x0ffd, 0x0ffe,
00319 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00320 };
00321 int cntr[8], n, res;
00322
00323 memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00324 memset(cntr, 0, sizeof(cntr));
00325 for (n = 0; n < 17; n++) {
00326 res = get_bits(gb, 3);
00327 if (cntr[res] > 3)
00328 return -1;
00329 vbm_tree[res * 3 + cntr[res]++] = n;
00330 }
00331 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00332 bits, 1, 1, codes, 2, 2, 132);
00333 return 0;
00334 }
00335
00339 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00340 {
00341 int n, flags, pitch_range, lsp16_flag;
00342 WMAVoiceContext *s = ctx->priv_data;
00343
00352 if (ctx->extradata_size != 46) {
00353 av_log(ctx, AV_LOG_ERROR,
00354 "Invalid extradata size %d (should be 46)\n",
00355 ctx->extradata_size);
00356 return -1;
00357 }
00358 flags = AV_RL32(ctx->extradata + 18);
00359 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00360 s->do_apf = flags & 0x1;
00361 if (s->do_apf) {
00362 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00363 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00364 ff_dct_init(&s->dct, 6, DCT_I);
00365 ff_dct_init(&s->dst, 6, DST_I);
00366
00367 ff_sine_window_init(s->cos, 256);
00368 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00369 for (n = 0; n < 255; n++) {
00370 s->sin[n] = -s->sin[510 - n];
00371 s->cos[510 - n] = s->cos[n];
00372 }
00373 }
00374 s->denoise_strength = (flags >> 2) & 0xF;
00375 if (s->denoise_strength >= 12) {
00376 av_log(ctx, AV_LOG_ERROR,
00377 "Invalid denoise filter strength %d (max=11)\n",
00378 s->denoise_strength);
00379 return -1;
00380 }
00381 s->denoise_tilt_corr = !!(flags & 0x40);
00382 s->dc_level = (flags >> 7) & 0xF;
00383 s->lsp_q_mode = !!(flags & 0x2000);
00384 s->lsp_def_mode = !!(flags & 0x4000);
00385 lsp16_flag = flags & 0x1000;
00386 if (lsp16_flag) {
00387 s->lsps = 16;
00388 s->frame_lsp_bitsize = 34;
00389 s->sframe_lsp_bitsize = 60;
00390 } else {
00391 s->lsps = 10;
00392 s->frame_lsp_bitsize = 24;
00393 s->sframe_lsp_bitsize = 48;
00394 }
00395 for (n = 0; n < s->lsps; n++)
00396 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00397
00398 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00399 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00400 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00401 return -1;
00402 }
00403
00404 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00405 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00406 pitch_range = s->max_pitch_val - s->min_pitch_val;
00407 if (pitch_range <= 0) {
00408 av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00409 return -1;
00410 }
00411 s->pitch_nbits = av_ceil_log2(pitch_range);
00412 s->last_pitch_val = 40;
00413 s->last_acb_type = ACB_TYPE_NONE;
00414 s->history_nsamples = s->max_pitch_val + 8;
00415
00416 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00417 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00418 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00419
00420 av_log(ctx, AV_LOG_ERROR,
00421 "Unsupported samplerate %d (min=%d, max=%d)\n",
00422 ctx->sample_rate, min_sr, max_sr);
00423
00424 return -1;
00425 }
00426
00427 s->block_conv_table[0] = s->min_pitch_val;
00428 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00429 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00430 s->block_conv_table[3] = s->max_pitch_val - 1;
00431 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00432 if (s->block_delta_pitch_hrange <= 0) {
00433 av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00434 return -1;
00435 }
00436 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00437 s->block_pitch_range = s->block_conv_table[2] +
00438 s->block_conv_table[3] + 1 +
00439 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00440 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00441
00442 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00443
00444 avcodec_get_frame_defaults(&s->frame);
00445 ctx->coded_frame = &s->frame;
00446
00447 return 0;
00448 }
00449
00471 static void adaptive_gain_control(float *out, const float *in,
00472 const float *speech_synth,
00473 int size, float alpha, float *gain_mem)
00474 {
00475 int i;
00476 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00477 float mem = *gain_mem;
00478
00479 for (i = 0; i < size; i++) {
00480 speech_energy += fabsf(speech_synth[i]);
00481 postfilter_energy += fabsf(in[i]);
00482 }
00483 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00484
00485 for (i = 0; i < size; i++) {
00486 mem = alpha * mem + gain_scale_factor;
00487 out[i] = in[i] * mem;
00488 }
00489
00490 *gain_mem = mem;
00491 }
00492
00511 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00512 const float *in, float *out, int size)
00513 {
00514 int n;
00515 float optimal_gain = 0, dot;
00516 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00517 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00518 *best_hist_ptr;
00519
00520
00521 do {
00522 dot = ff_dot_productf(in, ptr, size);
00523 if (dot > optimal_gain) {
00524 optimal_gain = dot;
00525 best_hist_ptr = ptr;
00526 }
00527 } while (--ptr >= end);
00528
00529 if (optimal_gain <= 0)
00530 return -1;
00531 dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00532 if (dot <= 0)
00533 return -1;
00534
00535 if (optimal_gain <= dot) {
00536 dot = dot / (dot + 0.6 * optimal_gain);
00537 } else
00538 dot = 0.625;
00539
00540
00541 for (n = 0; n < size; n++)
00542 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00543
00544 return 0;
00545 }
00546
00557 static float tilt_factor(const float *lpcs, int n_lpcs)
00558 {
00559 float rh0, rh1;
00560
00561 rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs);
00562 rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00563
00564 return rh1 / rh0;
00565 }
00566
00570 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00571 int fcb_type, float *coeffs, int remainder)
00572 {
00573 float last_coeff, min = 15.0, max = -15.0;
00574 float irange, angle_mul, gain_mul, range, sq;
00575 int n, idx;
00576
00577
00578 s->rdft.rdft_calc(&s->rdft, lpcs);
00579 #define log_range(var, assign) do { \
00580 float tmp = log10f(assign); var = tmp; \
00581 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00582 } while (0)
00583 log_range(last_coeff, lpcs[1] * lpcs[1]);
00584 for (n = 1; n < 64; n++)
00585 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00586 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00587 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00588 #undef log_range
00589 range = max - min;
00590 lpcs[64] = last_coeff;
00591
00592
00593
00594
00595
00596
00597 irange = 64.0 / range;
00598 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00599 (5.0 / 14.7));
00600 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00601 for (n = 0; n <= 64; n++) {
00602 float pwr;
00603
00604 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00605 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00606 lpcs[n] = angle_mul * pwr;
00607
00608
00609 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00610 if (idx > 127) {
00611 coeffs[n] = wmavoice_energy_table[127] *
00612 powf(1.0331663, idx - 127);
00613 } else
00614 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00615 }
00616
00617
00618
00619
00620
00621 s->dct.dct_calc(&s->dct, lpcs);
00622 s->dst.dct_calc(&s->dst, lpcs);
00623
00624
00625 idx = 255 + av_clip(lpcs[64], -255, 255);
00626 coeffs[0] = coeffs[0] * s->cos[idx];
00627 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00628 last_coeff = coeffs[64] * s->cos[idx];
00629 for (n = 63;; n--) {
00630 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00633
00634 if (!--n) break;
00635
00636 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00637 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00638 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00639 }
00640 coeffs[1] = last_coeff;
00641
00642
00643 s->irdft.rdft_calc(&s->irdft, coeffs);
00644
00645
00646 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00647 if (s->denoise_tilt_corr) {
00648 float tilt_mem = 0;
00649
00650 coeffs[remainder - 1] = 0;
00651 ff_tilt_compensation(&tilt_mem,
00652 -1.8 * tilt_factor(coeffs, remainder - 1),
00653 coeffs, remainder);
00654 }
00655 sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00656 for (n = 0; n < remainder; n++)
00657 coeffs[n] *= sq;
00658 }
00659
00686 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00687 float *synth_pf, int size,
00688 const float *lpcs)
00689 {
00690 int remainder, lim, n;
00691
00692 if (fcb_type != FCB_TYPE_SILENCE) {
00693 float *tilted_lpcs = s->tilted_lpcs_pf,
00694 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00695
00696 tilted_lpcs[0] = 1.0;
00697 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00698 memset(&tilted_lpcs[s->lsps + 1], 0,
00699 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00700 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00701 tilted_lpcs, s->lsps + 2);
00702
00703
00704
00705
00706
00707 remainder = FFMIN(127 - size, size - 1);
00708 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00709
00710
00711
00712 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00713 s->rdft.rdft_calc(&s->rdft, synth_pf);
00714 s->rdft.rdft_calc(&s->rdft, coeffs);
00715 synth_pf[0] *= coeffs[0];
00716 synth_pf[1] *= coeffs[1];
00717 for (n = 1; n < 64; n++) {
00718 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00719 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00720 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00721 }
00722 s->irdft.rdft_calc(&s->irdft, synth_pf);
00723 }
00724
00725
00726 if (s->denoise_filter_cache_size) {
00727 lim = FFMIN(s->denoise_filter_cache_size, size);
00728 for (n = 0; n < lim; n++)
00729 synth_pf[n] += s->denoise_filter_cache[n];
00730 s->denoise_filter_cache_size -= lim;
00731 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00732 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00733 }
00734
00735
00736 if (fcb_type != FCB_TYPE_SILENCE) {
00737 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00738 for (n = 0; n < lim; n++)
00739 s->denoise_filter_cache[n] += synth_pf[size + n];
00740 if (lim < remainder) {
00741 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00742 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00743 s->denoise_filter_cache_size = remainder;
00744 }
00745 }
00746 }
00747
00768 static void postfilter(WMAVoiceContext *s, const float *synth,
00769 float *samples, int size,
00770 const float *lpcs, float *zero_exc_pf,
00771 int fcb_type, int pitch)
00772 {
00773 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00774 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00775 *synth_filter_in = zero_exc_pf;
00776
00777 assert(size <= MAX_FRAMESIZE / 2);
00778
00779
00780 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00781
00782 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00783 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00784 synth_filter_in = synth_filter_in_buf;
00785
00786
00787 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00788 synth_filter_in, size, s->lsps);
00789 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00790 sizeof(synth_pf[0]) * s->lsps);
00791
00792 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00793
00794 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00795 &s->postfilter_agc);
00796
00797 if (s->dc_level > 8) {
00798
00799
00800
00801 ff_acelp_apply_order_2_transfer_function(samples, samples,
00802 (const float[2]) { -1.99997, 1.0 },
00803 (const float[2]) { -1.9330735188, 0.93589198496 },
00804 0.93980580475, s->dcf_mem, size);
00805 }
00806 }
00822 static void dequant_lsps(double *lsps, int num,
00823 const uint16_t *values,
00824 const uint16_t *sizes,
00825 int n_stages, const uint8_t *table,
00826 const double *mul_q,
00827 const double *base_q)
00828 {
00829 int n, m;
00830
00831 memset(lsps, 0, num * sizeof(*lsps));
00832 for (n = 0; n < n_stages; n++) {
00833 const uint8_t *t_off = &table[values[n] * num];
00834 double base = base_q[n], mul = mul_q[n];
00835
00836 for (m = 0; m < num; m++)
00837 lsps[m] += base + mul * t_off[m];
00838
00839 table += sizes[n] * num;
00840 }
00841 }
00842
00854 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00855 {
00856 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00857 static const double mul_lsf[4] = {
00858 5.2187144800e-3, 1.4626986422e-3,
00859 9.6179549166e-4, 1.1325736225e-3
00860 };
00861 static const double base_lsf[4] = {
00862 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00863 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00864 };
00865 uint16_t v[4];
00866
00867 v[0] = get_bits(gb, 8);
00868 v[1] = get_bits(gb, 6);
00869 v[2] = get_bits(gb, 5);
00870 v[3] = get_bits(gb, 5);
00871
00872 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00873 mul_lsf, base_lsf);
00874 }
00875
00880 static void dequant_lsp10r(GetBitContext *gb,
00881 double *i_lsps, const double *old,
00882 double *a1, double *a2, int q_mode)
00883 {
00884 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00885 static const double mul_lsf[3] = {
00886 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00887 };
00888 static const double base_lsf[3] = {
00889 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00890 };
00891 const float (*ipol_tab)[2][10] = q_mode ?
00892 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00893 uint16_t interpol, v[3];
00894 int n;
00895
00896 dequant_lsp10i(gb, i_lsps);
00897
00898 interpol = get_bits(gb, 5);
00899 v[0] = get_bits(gb, 7);
00900 v[1] = get_bits(gb, 6);
00901 v[2] = get_bits(gb, 6);
00902
00903 for (n = 0; n < 10; n++) {
00904 double delta = old[n] - i_lsps[n];
00905 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00906 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00907 }
00908
00909 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00910 mul_lsf, base_lsf);
00911 }
00912
00916 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00917 {
00918 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00919 static const double mul_lsf[5] = {
00920 3.3439586280e-3, 6.9908173703e-4,
00921 3.3216608306e-3, 1.0334960326e-3,
00922 3.1899104283e-3
00923 };
00924 static const double base_lsf[5] = {
00925 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00926 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00927 M_PI * -1.29816e-1
00928 };
00929 uint16_t v[5];
00930
00931 v[0] = get_bits(gb, 8);
00932 v[1] = get_bits(gb, 6);
00933 v[2] = get_bits(gb, 7);
00934 v[3] = get_bits(gb, 6);
00935 v[4] = get_bits(gb, 7);
00936
00937 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00938 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00939 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00940 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00941 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00942 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00943 }
00944
00949 static void dequant_lsp16r(GetBitContext *gb,
00950 double *i_lsps, const double *old,
00951 double *a1, double *a2, int q_mode)
00952 {
00953 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00954 static const double mul_lsf[3] = {
00955 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00956 };
00957 static const double base_lsf[3] = {
00958 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00959 };
00960 const float (*ipol_tab)[2][16] = q_mode ?
00961 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00962 uint16_t interpol, v[3];
00963 int n;
00964
00965 dequant_lsp16i(gb, i_lsps);
00966
00967 interpol = get_bits(gb, 5);
00968 v[0] = get_bits(gb, 7);
00969 v[1] = get_bits(gb, 7);
00970 v[2] = get_bits(gb, 7);
00971
00972 for (n = 0; n < 16; n++) {
00973 double delta = old[n] - i_lsps[n];
00974 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00975 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00976 }
00977
00978 dequant_lsps( a2, 10, v, vec_sizes, 1,
00979 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00980 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00981 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00982 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00983 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00984 }
00985
00999 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01000 const int *pitch)
01001 {
01002 static const int16_t start_offset[94] = {
01003 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
01004 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
01005 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
01006 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
01007 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
01008 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
01009 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01010 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01011 };
01012 int bits, offset;
01013
01014
01015 s->aw_idx_is_ext = 0;
01016 if ((bits = get_bits(gb, 6)) >= 54) {
01017 s->aw_idx_is_ext = 1;
01018 bits += (bits - 54) * 3 + get_bits(gb, 2);
01019 }
01020
01021
01022
01023 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01024 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01025 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01026 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01027 offset += s->aw_n_pulses[0] * pitch[0];
01028 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01029 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01030
01031
01032
01033
01034 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01035 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01036 s->aw_first_pulse_off[1] -= pitch[1];
01037 if (start_offset[bits] < 0)
01038 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01039 s->aw_first_pulse_off[0] -= pitch[0];
01040 }
01041 }
01042
01050 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01051 int block_idx, AMRFixed *fcb)
01052 {
01053 uint16_t use_mask_mem[9];
01054 uint16_t *use_mask = use_mask_mem + 2;
01055
01056
01057
01058
01059
01060
01061
01062 int pulse_off = s->aw_first_pulse_off[block_idx],
01063 pulse_start, n, idx, range, aidx, start_off = 0;
01064
01065
01066 if (s->aw_n_pulses[block_idx] > 0)
01067 while (pulse_off + s->aw_pulse_range < 1)
01068 pulse_off += fcb->pitch_lag;
01069
01070
01071 if (s->aw_n_pulses[0] > 0) {
01072 if (block_idx == 0) {
01073 range = 32;
01074 } else {
01075 range = 8;
01076 if (s->aw_n_pulses[block_idx] > 0)
01077 pulse_off = s->aw_next_pulse_off_cache;
01078 }
01079 } else
01080 range = 16;
01081 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01082
01083
01084
01085
01086 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01087 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01088 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01089 if (s->aw_n_pulses[block_idx] > 0)
01090 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01091 int excl_range = s->aw_pulse_range;
01092 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01093 int first_sh = 16 - (idx & 15);
01094 *use_mask_ptr++ &= 0xFFFFu << first_sh;
01095 excl_range -= first_sh;
01096 if (excl_range >= 16) {
01097 *use_mask_ptr++ = 0;
01098 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01099 } else
01100 *use_mask_ptr &= 0xFFFF >> excl_range;
01101 }
01102
01103
01104 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01105 for (n = 0; n <= aidx; pulse_start++) {
01106 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01107 if (idx >= MAX_FRAMESIZE / 2) {
01108 if (use_mask[0]) idx = 0x0F;
01109 else if (use_mask[1]) idx = 0x1F;
01110 else if (use_mask[2]) idx = 0x2F;
01111 else if (use_mask[3]) idx = 0x3F;
01112 else if (use_mask[4]) idx = 0x4F;
01113 else return;
01114 idx -= av_log2_16bit(use_mask[idx >> 4]);
01115 }
01116 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01117 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01118 n++;
01119 start_off = idx;
01120 }
01121 }
01122
01123 fcb->x[fcb->n] = start_off;
01124 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01125 fcb->n++;
01126
01127
01128 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01129 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01130 }
01131
01139 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01140 int block_idx, AMRFixed *fcb)
01141 {
01142 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01143 float v;
01144
01145 if (s->aw_n_pulses[block_idx] > 0) {
01146 int n, v_mask, i_mask, sh, n_pulses;
01147
01148 if (s->aw_pulse_range == 24) {
01149 n_pulses = 3;
01150 v_mask = 8;
01151 i_mask = 7;
01152 sh = 4;
01153 } else {
01154 n_pulses = 4;
01155 v_mask = 4;
01156 i_mask = 3;
01157 sh = 3;
01158 }
01159
01160 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01161 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01162 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01163 s->aw_first_pulse_off[block_idx];
01164 while (fcb->x[fcb->n] < 0)
01165 fcb->x[fcb->n] += fcb->pitch_lag;
01166 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01167 fcb->n++;
01168 }
01169 } else {
01170 int num2 = (val & 0x1FF) >> 1, delta, idx;
01171
01172 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01173 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01174 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01175 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01176 v = (val & 0x200) ? -1.0 : 1.0;
01177
01178 fcb->no_repeat_mask |= 3 << fcb->n;
01179 fcb->x[fcb->n] = idx - delta;
01180 fcb->y[fcb->n] = v;
01181 fcb->x[fcb->n + 1] = idx;
01182 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01183 fcb->n += 2;
01184 }
01185 }
01186
01200 static int pRNG(int frame_cntr, int block_num, int block_size)
01201 {
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212 static const unsigned int div_tbl[9][2] = {
01213 { 8332, 3 * 715827883U },
01214 { 4545, 0 * 390451573U },
01215 { 3124, 11 * 268435456U },
01216 { 2380, 15 * 204522253U },
01217 { 1922, 23 * 165191050U },
01218 { 1612, 23 * 138547333U },
01219 { 1388, 27 * 119304648U },
01220 { 1219, 16 * 104755300U },
01221 { 1086, 39 * 93368855U }
01222 };
01223 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01224 if (x >= 0xFFFF) x -= 0xFFFF;
01225
01226 y = x - 9 * MULH(477218589, x);
01227 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01228
01229 return z % (1000 - block_size);
01230 }
01231
01236 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01237 int block_idx, int size,
01238 const struct frame_type_desc *frame_desc,
01239 float *excitation)
01240 {
01241 float gain;
01242 int n, r_idx;
01243
01244 assert(size <= MAX_FRAMESIZE);
01245
01246
01247 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01248 r_idx = pRNG(s->frame_cntr, block_idx, size);
01249 gain = s->silence_gain;
01250 } else {
01251 r_idx = get_bits(gb, 8);
01252 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01253 }
01254
01255
01256 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01257
01258
01259 for (n = 0; n < size; n++)
01260 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01261 }
01262
01267 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01268 int block_idx, int size,
01269 int block_pitch_sh2,
01270 const struct frame_type_desc *frame_desc,
01271 float *excitation)
01272 {
01273 static const float gain_coeff[6] = {
01274 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01275 };
01276 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01277 int n, idx, gain_weight;
01278 AMRFixed fcb;
01279
01280 assert(size <= MAX_FRAMESIZE / 2);
01281 memset(pulses, 0, sizeof(*pulses) * size);
01282
01283 fcb.pitch_lag = block_pitch_sh2 >> 2;
01284 fcb.pitch_fac = 1.0;
01285 fcb.no_repeat_mask = 0;
01286 fcb.n = 0;
01287
01288
01289
01290 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01291 aw_pulse_set1(s, gb, block_idx, &fcb);
01292 aw_pulse_set2(s, gb, block_idx, &fcb);
01293 } else {
01294 int offset_nbits = 5 - frame_desc->log_n_blocks;
01295
01296 fcb.no_repeat_mask = -1;
01297
01298
01299 for (n = 0; n < 5; n++) {
01300 float sign;
01301 int pos1, pos2;
01302
01303 sign = get_bits1(gb) ? 1.0 : -1.0;
01304 pos1 = get_bits(gb, offset_nbits);
01305 fcb.x[fcb.n] = n + 5 * pos1;
01306 fcb.y[fcb.n++] = sign;
01307 if (n < frame_desc->dbl_pulses) {
01308 pos2 = get_bits(gb, offset_nbits);
01309 fcb.x[fcb.n] = n + 5 * pos2;
01310 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01311 }
01312 }
01313 }
01314 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01315
01316
01317
01318 idx = get_bits(gb, 7);
01319 fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01320 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01321 acb_gain = wmavoice_gain_codebook_acb[idx];
01322 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01323 -2.9957322736 ,
01324 1.6094379124 );
01325
01326 gain_weight = 8 >> frame_desc->log_n_blocks;
01327 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01328 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01329 for (n = 0; n < gain_weight; n++)
01330 s->gain_pred_err[n] = pred_err;
01331
01332
01333 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01334 int len;
01335 for (n = 0; n < size; n += len) {
01336 int next_idx_sh16;
01337 int abs_idx = block_idx * size + n;
01338 int pitch_sh16 = (s->last_pitch_val << 16) +
01339 s->pitch_diff_sh16 * abs_idx;
01340 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01341 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01342 idx = idx_sh16 >> 16;
01343 if (s->pitch_diff_sh16) {
01344 if (s->pitch_diff_sh16 > 0) {
01345 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01346 } else
01347 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01348 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01349 1, size - n);
01350 } else
01351 len = size;
01352
01353 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01354 wmavoice_ipol1_coeffs, 17,
01355 idx, 9, len);
01356 }
01357 } else {
01358 int block_pitch = block_pitch_sh2 >> 2;
01359 idx = block_pitch_sh2 & 3;
01360 if (idx) {
01361 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01362 wmavoice_ipol2_coeffs, 4,
01363 idx, 8, size);
01364 } else
01365 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01366 sizeof(float) * size);
01367 }
01368
01369
01370 ff_weighted_vector_sumf(excitation, excitation, pulses,
01371 acb_gain, fcb_gain, size);
01372 }
01373
01390 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01391 int block_idx, int size,
01392 int block_pitch_sh2,
01393 const double *lsps, const double *prev_lsps,
01394 const struct frame_type_desc *frame_desc,
01395 float *excitation, float *synth)
01396 {
01397 double i_lsps[MAX_LSPS];
01398 float lpcs[MAX_LSPS];
01399 float fac;
01400 int n;
01401
01402 if (frame_desc->acb_type == ACB_TYPE_NONE)
01403 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01404 else
01405 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01406 frame_desc, excitation);
01407
01408
01409 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01410 for (n = 0; n < s->lsps; n++)
01411 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01412 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01413
01414
01415 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01416 }
01417
01433 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01434 float *samples,
01435 const double *lsps, const double *prev_lsps,
01436 float *excitation, float *synth)
01437 {
01438 WMAVoiceContext *s = ctx->priv_data;
01439 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01440 int pitch[MAX_BLOCKS], last_block_pitch;
01441
01442
01443 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
01444 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01445
01446 if (bd_idx < 0) {
01447 av_log(ctx, AV_LOG_ERROR,
01448 "Invalid frame type VLC code, skipping\n");
01449 return -1;
01450 }
01451
01452
01453 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01454
01455
01456
01457
01458 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01459 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01460 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01461 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01462 if (s->last_acb_type == ACB_TYPE_NONE ||
01463 20 * abs(cur_pitch_val - s->last_pitch_val) >
01464 (cur_pitch_val + s->last_pitch_val))
01465 s->last_pitch_val = cur_pitch_val;
01466
01467
01468 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01469 int fac = n * 2 + 1;
01470
01471 pitch[n] = (MUL16(fac, cur_pitch_val) +
01472 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01473 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01474 }
01475
01476
01477 s->pitch_diff_sh16 =
01478 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01479 }
01480
01481
01482 switch (frame_descs[bd_idx].fcb_type) {
01483 case FCB_TYPE_SILENCE:
01484 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01485 break;
01486 case FCB_TYPE_AW_PULSES:
01487 aw_parse_coords(s, gb, pitch);
01488 break;
01489 }
01490
01491 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01492 int bl_pitch_sh2;
01493
01494
01495 switch (frame_descs[bd_idx].acb_type) {
01496 case ACB_TYPE_HAMMING: {
01497
01498
01499
01500
01501
01502 int block_pitch,
01503 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01504 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01505 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01506
01507 if (n == 0) {
01508 block_pitch = get_bits(gb, s->block_pitch_nbits);
01509 } else
01510 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01511 get_bits(gb, s->block_delta_pitch_nbits);
01512
01513 last_block_pitch = av_clip(block_pitch,
01514 s->block_delta_pitch_hrange,
01515 s->block_pitch_range -
01516 s->block_delta_pitch_hrange);
01517
01518
01519 if (block_pitch < t1) {
01520 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01521 } else {
01522 block_pitch -= t1;
01523 if (block_pitch < t2) {
01524 bl_pitch_sh2 =
01525 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01526 } else {
01527 block_pitch -= t2;
01528 if (block_pitch < t3) {
01529 bl_pitch_sh2 =
01530 (s->block_conv_table[2] + block_pitch) << 2;
01531 } else
01532 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01533 }
01534 }
01535 pitch[n] = bl_pitch_sh2 >> 2;
01536 break;
01537 }
01538
01539 case ACB_TYPE_ASYMMETRIC: {
01540 bl_pitch_sh2 = pitch[n] << 2;
01541 break;
01542 }
01543
01544 default:
01545 bl_pitch_sh2 = 0;
01546 break;
01547 }
01548
01549 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01550 lsps, prev_lsps, &frame_descs[bd_idx],
01551 &excitation[n * block_nsamples],
01552 &synth[n * block_nsamples]);
01553 }
01554
01555
01556
01557 if (s->do_apf) {
01558 double i_lsps[MAX_LSPS];
01559 float lpcs[MAX_LSPS];
01560
01561 for (n = 0; n < s->lsps; n++)
01562 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01563 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01564 postfilter(s, synth, samples, 80, lpcs,
01565 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01566 frame_descs[bd_idx].fcb_type, pitch[0]);
01567
01568 for (n = 0; n < s->lsps; n++)
01569 i_lsps[n] = cos(lsps[n]);
01570 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01571 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01572 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01573 frame_descs[bd_idx].fcb_type, pitch[0]);
01574 } else
01575 memcpy(samples, synth, 160 * sizeof(synth[0]));
01576
01577
01578 s->frame_cntr++;
01579 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01580 s->last_acb_type = frame_descs[bd_idx].acb_type;
01581 switch (frame_descs[bd_idx].acb_type) {
01582 case ACB_TYPE_NONE:
01583 s->last_pitch_val = 0;
01584 break;
01585 case ACB_TYPE_ASYMMETRIC:
01586 s->last_pitch_val = cur_pitch_val;
01587 break;
01588 case ACB_TYPE_HAMMING:
01589 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01590 break;
01591 }
01592
01593 return 0;
01594 }
01595
01608 static void stabilize_lsps(double *lsps, int num)
01609 {
01610 int n, m, l;
01611
01612
01613
01614
01615 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01616 for (n = 1; n < num; n++)
01617 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01618 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01619
01620
01621
01622 for (n = 1; n < num; n++) {
01623 if (lsps[n] < lsps[n - 1]) {
01624 for (m = 1; m < num; m++) {
01625 double tmp = lsps[m];
01626 for (l = m - 1; l >= 0; l--) {
01627 if (lsps[l] <= tmp) break;
01628 lsps[l + 1] = lsps[l];
01629 }
01630 lsps[l + 1] = tmp;
01631 }
01632 break;
01633 }
01634 }
01635 }
01636
01646 static int check_bits_for_superframe(GetBitContext *orig_gb,
01647 WMAVoiceContext *s)
01648 {
01649 GetBitContext s_gb, *gb = &s_gb;
01650 int n, need_bits, bd_idx;
01651 const struct frame_type_desc *frame_desc;
01652
01653
01654 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01655 skip_bits_long(gb, get_bits_count(orig_gb));
01656 assert(get_bits_left(gb) == get_bits_left(orig_gb));
01657
01658
01659 if (get_bits_left(gb) < 14)
01660 return 1;
01661 if (!get_bits1(gb))
01662 return -1;
01663 if (get_bits1(gb)) skip_bits(gb, 12);
01664 if (s->has_residual_lsps) {
01665 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01666 return 1;
01667 skip_bits_long(gb, s->sframe_lsp_bitsize);
01668 }
01669
01670
01671 for (n = 0; n < MAX_FRAMES; n++) {
01672 int aw_idx_is_ext = 0;
01673
01674 if (!s->has_residual_lsps) {
01675 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01676 skip_bits_long(gb, s->frame_lsp_bitsize);
01677 }
01678 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01679 if (bd_idx < 0)
01680 return -1;
01681 frame_desc = &frame_descs[bd_idx];
01682 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01683 if (get_bits_left(gb) < s->pitch_nbits)
01684 return 1;
01685 skip_bits_long(gb, s->pitch_nbits);
01686 }
01687 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01688 skip_bits(gb, 8);
01689 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01690 int tmp = get_bits(gb, 6);
01691 if (tmp >= 0x36) {
01692 skip_bits(gb, 2);
01693 aw_idx_is_ext = 1;
01694 }
01695 }
01696
01697
01698 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01699 need_bits = s->block_pitch_nbits +
01700 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01701 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01702 need_bits = 2 * !aw_idx_is_ext;
01703 } else
01704 need_bits = 0;
01705 need_bits += frame_desc->frame_size;
01706 if (get_bits_left(gb) < need_bits)
01707 return 1;
01708 skip_bits_long(gb, need_bits);
01709 }
01710
01711 return 0;
01712 }
01713
01734 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01735 {
01736 WMAVoiceContext *s = ctx->priv_data;
01737 GetBitContext *gb = &s->gb, s_gb;
01738 int n, res, n_samples = 480;
01739 double lsps[MAX_FRAMES][MAX_LSPS];
01740 const double *mean_lsf = s->lsps == 16 ?
01741 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01742 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01743 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01744 float *samples;
01745
01746 memcpy(synth, s->synth_history,
01747 s->lsps * sizeof(*synth));
01748 memcpy(excitation, s->excitation_history,
01749 s->history_nsamples * sizeof(*excitation));
01750
01751 if (s->sframe_cache_size > 0) {
01752 gb = &s_gb;
01753 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01754 s->sframe_cache_size = 0;
01755 }
01756
01757 if ((res = check_bits_for_superframe(gb, s)) == 1) {
01758 *got_frame_ptr = 0;
01759 return 1;
01760 }
01761
01762
01763
01764
01765
01766 if (!get_bits1(gb)) {
01767 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01768 return -1;
01769 }
01770
01771
01772 if (get_bits1(gb)) {
01773 if ((n_samples = get_bits(gb, 12)) > 480) {
01774 av_log(ctx, AV_LOG_ERROR,
01775 "Superframe encodes >480 samples (%d), not allowed\n",
01776 n_samples);
01777 return -1;
01778 }
01779 }
01780
01781 if (s->has_residual_lsps) {
01782 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01783
01784 for (n = 0; n < s->lsps; n++)
01785 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01786
01787 if (s->lsps == 10) {
01788 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01789 } else
01790 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01791
01792 for (n = 0; n < s->lsps; n++) {
01793 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01794 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01795 lsps[2][n] += mean_lsf[n];
01796 }
01797 for (n = 0; n < 3; n++)
01798 stabilize_lsps(lsps[n], s->lsps);
01799 }
01800
01801
01802 s->frame.nb_samples = 480;
01803 if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) {
01804 av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01805 return res;
01806 }
01807 s->frame.nb_samples = n_samples;
01808 samples = (float *)s->frame.data[0];
01809
01810
01811 for (n = 0; n < 3; n++) {
01812 if (!s->has_residual_lsps) {
01813 int m;
01814
01815 if (s->lsps == 10) {
01816 dequant_lsp10i(gb, lsps[n]);
01817 } else
01818 dequant_lsp16i(gb, lsps[n]);
01819
01820 for (m = 0; m < s->lsps; m++)
01821 lsps[n][m] += mean_lsf[m];
01822 stabilize_lsps(lsps[n], s->lsps);
01823 }
01824
01825 if ((res = synth_frame(ctx, gb, n,
01826 &samples[n * MAX_FRAMESIZE],
01827 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01828 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01829 &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01830 *got_frame_ptr = 0;
01831 return res;
01832 }
01833 }
01834
01835
01836
01837
01838 if (get_bits1(gb)) {
01839 res = get_bits(gb, 4);
01840 skip_bits(gb, 10 * (res + 1));
01841 }
01842
01843 *got_frame_ptr = 1;
01844
01845
01846 memcpy(s->prev_lsps, lsps[2],
01847 s->lsps * sizeof(*s->prev_lsps));
01848 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01849 s->lsps * sizeof(*synth));
01850 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01851 s->history_nsamples * sizeof(*excitation));
01852 if (s->do_apf)
01853 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01854 s->history_nsamples * sizeof(*s->zero_exc_pf));
01855
01856 return 0;
01857 }
01858
01866 static int parse_packet_header(WMAVoiceContext *s)
01867 {
01868 GetBitContext *gb = &s->gb;
01869 unsigned int res;
01870
01871 if (get_bits_left(gb) < 11)
01872 return 1;
01873 skip_bits(gb, 4);
01874 s->has_residual_lsps = get_bits1(gb);
01875 do {
01876 res = get_bits(gb, 6);
01877
01878 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01879 return 1;
01880 } while (res == 0x3F);
01881 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01882
01883 return 0;
01884 }
01885
01901 static void copy_bits(PutBitContext *pb,
01902 const uint8_t *data, int size,
01903 GetBitContext *gb, int nbits)
01904 {
01905 int rmn_bytes, rmn_bits;
01906
01907 rmn_bits = rmn_bytes = get_bits_left(gb);
01908 if (rmn_bits < nbits)
01909 return;
01910 if (nbits > pb->size_in_bits - put_bits_count(pb))
01911 return;
01912 rmn_bits &= 7; rmn_bytes >>= 3;
01913 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01914 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01915 avpriv_copy_bits(pb, data + size - rmn_bytes,
01916 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01917 }
01918
01930 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01931 int *got_frame_ptr, AVPacket *avpkt)
01932 {
01933 WMAVoiceContext *s = ctx->priv_data;
01934 GetBitContext *gb = &s->gb;
01935 int size, res, pos;
01936
01937
01938
01939
01940
01941
01942 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01943 if (!size) {
01944 *got_frame_ptr = 0;
01945 return 0;
01946 }
01947 init_get_bits(&s->gb, avpkt->data, size << 3);
01948
01949
01950
01951
01952 if (size == ctx->block_align) {
01953 if ((res = parse_packet_header(s)) < 0)
01954 return res;
01955
01956
01957
01958
01959 if (s->spillover_nbits > 0) {
01960 if (s->sframe_cache_size > 0) {
01961 int cnt = get_bits_count(gb);
01962 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01963 flush_put_bits(&s->pb);
01964 s->sframe_cache_size += s->spillover_nbits;
01965 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01966 *got_frame_ptr) {
01967 cnt += s->spillover_nbits;
01968 s->skip_bits_next = cnt & 7;
01969 *(AVFrame *)data = s->frame;
01970 return cnt >> 3;
01971 } else
01972 skip_bits_long (gb, s->spillover_nbits - cnt +
01973 get_bits_count(gb));
01974 } else
01975 skip_bits_long(gb, s->spillover_nbits);
01976 }
01977 } else if (s->skip_bits_next)
01978 skip_bits(gb, s->skip_bits_next);
01979
01980
01981 s->sframe_cache_size = 0;
01982 s->skip_bits_next = 0;
01983 pos = get_bits_left(gb);
01984 if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01985 return res;
01986 } else if (*got_frame_ptr) {
01987 int cnt = get_bits_count(gb);
01988 s->skip_bits_next = cnt & 7;
01989 *(AVFrame *)data = s->frame;
01990 return cnt >> 3;
01991 } else if ((s->sframe_cache_size = pos) > 0) {
01992
01993 init_get_bits(gb, avpkt->data, size << 3);
01994 skip_bits_long(gb, (size << 3) - pos);
01995 assert(get_bits_left(gb) == pos);
01996
01997
01998 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01999 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
02000
02001
02002 }
02003
02004 return size;
02005 }
02006
02007 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02008 {
02009 WMAVoiceContext *s = ctx->priv_data;
02010
02011 if (s->do_apf) {
02012 ff_rdft_end(&s->rdft);
02013 ff_rdft_end(&s->irdft);
02014 ff_dct_end(&s->dct);
02015 ff_dct_end(&s->dst);
02016 }
02017
02018 return 0;
02019 }
02020
02021 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02022 {
02023 WMAVoiceContext *s = ctx->priv_data;
02024 int n;
02025
02026 s->postfilter_agc = 0;
02027 s->sframe_cache_size = 0;
02028 s->skip_bits_next = 0;
02029 for (n = 0; n < s->lsps; n++)
02030 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02031 memset(s->excitation_history, 0,
02032 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02033 memset(s->synth_history, 0,
02034 sizeof(*s->synth_history) * MAX_LSPS);
02035 memset(s->gain_pred_err, 0,
02036 sizeof(s->gain_pred_err));
02037
02038 if (s->do_apf) {
02039 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02040 sizeof(*s->synth_filter_out_buf) * s->lsps);
02041 memset(s->dcf_mem, 0,
02042 sizeof(*s->dcf_mem) * 2);
02043 memset(s->zero_exc_pf, 0,
02044 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02045 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02046 }
02047 }
02048
02049 AVCodec ff_wmavoice_decoder = {
02050 .name = "wmavoice",
02051 .type = AVMEDIA_TYPE_AUDIO,
02052 .id = CODEC_ID_WMAVOICE,
02053 .priv_data_size = sizeof(WMAVoiceContext),
02054 .init = wmavoice_decode_init,
02055 .close = wmavoice_decode_end,
02056 .decode = wmavoice_decode_packet,
02057 .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02058 .flush = wmavoice_flush,
02059 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02060 };