00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "libavutil/channel_layout.h"
00023 #include "libavutil/intreadwrite.h"
00024 #include "avcodec.h"
00025 #include "dsputil.h"
00026 #include "get_bits.h"
00027 #include "internal.h"
00028
00029 #include "truespeech_data.h"
00038 typedef struct {
00039 AVFrame frame;
00040 DSPContext dsp;
00041
00042 DECLARE_ALIGNED(16, uint8_t, buffer)[32];
00043 int16_t vector[8];
00044 int offset1[2];
00045 int offset2[4];
00046 int pulseoff[4];
00047 int pulsepos[4];
00048 int pulseval[4];
00049 int flag;
00050
00051 int filtbuf[146];
00052 int prevfilt[8];
00053 int16_t tmp1[8];
00054 int16_t tmp2[8];
00055 int16_t tmp3[8];
00056 int16_t cvector[8];
00057 int filtval;
00058 int16_t newvec[60];
00059 int16_t filters[32];
00060 } TSContext;
00061
00062 static av_cold int truespeech_decode_init(AVCodecContext * avctx)
00063 {
00064 TSContext *c = avctx->priv_data;
00065
00066 if (avctx->channels != 1) {
00067 av_log_ask_for_sample(avctx, "Unsupported channel count: %d\n", avctx->channels);
00068 return AVERROR(EINVAL);
00069 }
00070
00071 avctx->channel_layout = AV_CH_LAYOUT_MONO;
00072 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
00073
00074 ff_dsputil_init(&c->dsp, avctx);
00075
00076 avcodec_get_frame_defaults(&c->frame);
00077 avctx->coded_frame = &c->frame;
00078
00079 return 0;
00080 }
00081
00082 static void truespeech_read_frame(TSContext *dec, const uint8_t *input)
00083 {
00084 GetBitContext gb;
00085
00086 dec->dsp.bswap_buf((uint32_t *)dec->buffer, (const uint32_t *)input, 8);
00087 init_get_bits(&gb, dec->buffer, 32 * 8);
00088
00089 dec->vector[7] = ts_codebook[7][get_bits(&gb, 3)];
00090 dec->vector[6] = ts_codebook[6][get_bits(&gb, 3)];
00091 dec->vector[5] = ts_codebook[5][get_bits(&gb, 3)];
00092 dec->vector[4] = ts_codebook[4][get_bits(&gb, 4)];
00093 dec->vector[3] = ts_codebook[3][get_bits(&gb, 4)];
00094 dec->vector[2] = ts_codebook[2][get_bits(&gb, 4)];
00095 dec->vector[1] = ts_codebook[1][get_bits(&gb, 5)];
00096 dec->vector[0] = ts_codebook[0][get_bits(&gb, 5)];
00097 dec->flag = get_bits1(&gb);
00098
00099 dec->offset1[0] = get_bits(&gb, 4) << 4;
00100 dec->offset2[3] = get_bits(&gb, 7);
00101 dec->offset2[2] = get_bits(&gb, 7);
00102 dec->offset2[1] = get_bits(&gb, 7);
00103 dec->offset2[0] = get_bits(&gb, 7);
00104
00105 dec->offset1[1] = get_bits(&gb, 4);
00106 dec->pulseval[1] = get_bits(&gb, 14);
00107 dec->pulseval[0] = get_bits(&gb, 14);
00108
00109 dec->offset1[1] |= get_bits(&gb, 4) << 4;
00110 dec->pulseval[3] = get_bits(&gb, 14);
00111 dec->pulseval[2] = get_bits(&gb, 14);
00112
00113 dec->offset1[0] |= get_bits1(&gb);
00114 dec->pulsepos[0] = get_bits_long(&gb, 27);
00115 dec->pulseoff[0] = get_bits(&gb, 4);
00116
00117 dec->offset1[0] |= get_bits1(&gb) << 1;
00118 dec->pulsepos[1] = get_bits_long(&gb, 27);
00119 dec->pulseoff[1] = get_bits(&gb, 4);
00120
00121 dec->offset1[0] |= get_bits1(&gb) << 2;
00122 dec->pulsepos[2] = get_bits_long(&gb, 27);
00123 dec->pulseoff[2] = get_bits(&gb, 4);
00124
00125 dec->offset1[0] |= get_bits1(&gb) << 3;
00126 dec->pulsepos[3] = get_bits_long(&gb, 27);
00127 dec->pulseoff[3] = get_bits(&gb, 4);
00128 }
00129
00130 static void truespeech_correlate_filter(TSContext *dec)
00131 {
00132 int16_t tmp[8];
00133 int i, j;
00134
00135 for(i = 0; i < 8; i++){
00136 if(i > 0){
00137 memcpy(tmp, dec->cvector, i * sizeof(*tmp));
00138 for(j = 0; j < i; j++)
00139 dec->cvector[j] = ((tmp[i - j - 1] * dec->vector[i]) +
00140 (dec->cvector[j] << 15) + 0x4000) >> 15;
00141 }
00142 dec->cvector[i] = (8 - dec->vector[i]) >> 3;
00143 }
00144 for(i = 0; i < 8; i++)
00145 dec->cvector[i] = (dec->cvector[i] * ts_decay_994_1000[i]) >> 15;
00146
00147 dec->filtval = dec->vector[0];
00148 }
00149
00150 static void truespeech_filters_merge(TSContext *dec)
00151 {
00152 int i;
00153
00154 if(!dec->flag){
00155 for(i = 0; i < 8; i++){
00156 dec->filters[i + 0] = dec->prevfilt[i];
00157 dec->filters[i + 8] = dec->prevfilt[i];
00158 }
00159 }else{
00160 for(i = 0; i < 8; i++){
00161 dec->filters[i + 0]=(dec->cvector[i] * 21846 + dec->prevfilt[i] * 10923 + 16384) >> 15;
00162 dec->filters[i + 8]=(dec->cvector[i] * 10923 + dec->prevfilt[i] * 21846 + 16384) >> 15;
00163 }
00164 }
00165 for(i = 0; i < 8; i++){
00166 dec->filters[i + 16] = dec->cvector[i];
00167 dec->filters[i + 24] = dec->cvector[i];
00168 }
00169 }
00170
00171 static void truespeech_apply_twopoint_filter(TSContext *dec, int quart)
00172 {
00173 int16_t tmp[146 + 60], *ptr0, *ptr1;
00174 const int16_t *filter;
00175 int i, t, off;
00176
00177 t = dec->offset2[quart];
00178 if(t == 127){
00179 memset(dec->newvec, 0, 60 * sizeof(*dec->newvec));
00180 return;
00181 }
00182 for(i = 0; i < 146; i++)
00183 tmp[i] = dec->filtbuf[i];
00184 off = (t / 25) + dec->offset1[quart >> 1] + 18;
00185 off = av_clip(off, 0, 145);
00186 ptr0 = tmp + 145 - off;
00187 ptr1 = tmp + 146;
00188 filter = ts_order2_coeffs + (t % 25) * 2;
00189 for(i = 0; i < 60; i++){
00190 t = (ptr0[0] * filter[0] + ptr0[1] * filter[1] + 0x2000) >> 14;
00191 ptr0++;
00192 dec->newvec[i] = t;
00193 ptr1[i] = t;
00194 }
00195 }
00196
00197 static void truespeech_place_pulses(TSContext *dec, int16_t *out, int quart)
00198 {
00199 int16_t tmp[7];
00200 int i, j, t;
00201 const int16_t *ptr1;
00202 int16_t *ptr2;
00203 int coef;
00204
00205 memset(out, 0, 60 * sizeof(*out));
00206 for(i = 0; i < 7; i++) {
00207 t = dec->pulseval[quart] & 3;
00208 dec->pulseval[quart] >>= 2;
00209 tmp[6 - i] = ts_pulse_scales[dec->pulseoff[quart] * 4 + t];
00210 }
00211
00212 coef = dec->pulsepos[quart] >> 15;
00213 ptr1 = ts_pulse_values + 30;
00214 ptr2 = tmp;
00215 for(i = 0, j = 3; (i < 30) && (j > 0); i++){
00216 t = *ptr1++;
00217 if(coef >= t)
00218 coef -= t;
00219 else{
00220 out[i] = *ptr2++;
00221 ptr1 += 30;
00222 j--;
00223 }
00224 }
00225 coef = dec->pulsepos[quart] & 0x7FFF;
00226 ptr1 = ts_pulse_values;
00227 for(i = 30, j = 4; (i < 60) && (j > 0); i++){
00228 t = *ptr1++;
00229 if(coef >= t)
00230 coef -= t;
00231 else{
00232 out[i] = *ptr2++;
00233 ptr1 += 30;
00234 j--;
00235 }
00236 }
00237
00238 }
00239
00240 static void truespeech_update_filters(TSContext *dec, int16_t *out, int quart)
00241 {
00242 int i;
00243
00244 memmove(dec->filtbuf, &dec->filtbuf[60], 86 * sizeof(*dec->filtbuf));
00245 for(i = 0; i < 60; i++){
00246 dec->filtbuf[i + 86] = out[i] + dec->newvec[i] - (dec->newvec[i] >> 3);
00247 out[i] += dec->newvec[i];
00248 }
00249 }
00250
00251 static void truespeech_synth(TSContext *dec, int16_t *out, int quart)
00252 {
00253 int i,k;
00254 int t[8];
00255 int16_t *ptr0, *ptr1;
00256
00257 ptr0 = dec->tmp1;
00258 ptr1 = dec->filters + quart * 8;
00259 for(i = 0; i < 60; i++){
00260 int sum = 0;
00261 for(k = 0; k < 8; k++)
00262 sum += ptr0[k] * ptr1[k];
00263 sum = (sum + (out[i] << 12) + 0x800) >> 12;
00264 out[i] = av_clip(sum, -0x7FFE, 0x7FFE);
00265 for(k = 7; k > 0; k--)
00266 ptr0[k] = ptr0[k - 1];
00267 ptr0[0] = out[i];
00268 }
00269
00270 for(i = 0; i < 8; i++)
00271 t[i] = (ts_decay_35_64[i] * ptr1[i]) >> 15;
00272
00273 ptr0 = dec->tmp2;
00274 for(i = 0; i < 60; i++){
00275 int sum = 0;
00276 for(k = 0; k < 8; k++)
00277 sum += ptr0[k] * t[k];
00278 for(k = 7; k > 0; k--)
00279 ptr0[k] = ptr0[k - 1];
00280 ptr0[0] = out[i];
00281 out[i] = ((out[i] << 12) - sum) >> 12;
00282 }
00283
00284 for(i = 0; i < 8; i++)
00285 t[i] = (ts_decay_3_4[i] * ptr1[i]) >> 15;
00286
00287 ptr0 = dec->tmp3;
00288 for(i = 0; i < 60; i++){
00289 int sum = out[i] << 12;
00290 for(k = 0; k < 8; k++)
00291 sum += ptr0[k] * t[k];
00292 for(k = 7; k > 0; k--)
00293 ptr0[k] = ptr0[k - 1];
00294 ptr0[0] = av_clip((sum + 0x800) >> 12, -0x7FFE, 0x7FFE);
00295
00296 sum = ((ptr0[1] * (dec->filtval - (dec->filtval >> 2))) >> 4) + sum;
00297 sum = sum - (sum >> 3);
00298 out[i] = av_clip((sum + 0x800) >> 12, -0x7FFE, 0x7FFE);
00299 }
00300 }
00301
00302 static void truespeech_save_prevvec(TSContext *c)
00303 {
00304 int i;
00305
00306 for(i = 0; i < 8; i++)
00307 c->prevfilt[i] = c->cvector[i];
00308 }
00309
00310 static int truespeech_decode_frame(AVCodecContext *avctx, void *data,
00311 int *got_frame_ptr, AVPacket *avpkt)
00312 {
00313 const uint8_t *buf = avpkt->data;
00314 int buf_size = avpkt->size;
00315 TSContext *c = avctx->priv_data;
00316
00317 int i, j;
00318 int16_t *samples;
00319 int iterations, ret;
00320
00321 iterations = buf_size / 32;
00322
00323 if (!iterations) {
00324 av_log(avctx, AV_LOG_ERROR,
00325 "Too small input buffer (%d bytes), need at least 32 bytes\n", buf_size);
00326 return -1;
00327 }
00328
00329
00330 c->frame.nb_samples = iterations * 240;
00331 if ((ret = ff_get_buffer(avctx, &c->frame)) < 0) {
00332 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
00333 return ret;
00334 }
00335 samples = (int16_t *)c->frame.data[0];
00336
00337 memset(samples, 0, iterations * 240 * sizeof(*samples));
00338
00339 for(j = 0; j < iterations; j++) {
00340 truespeech_read_frame(c, buf);
00341 buf += 32;
00342
00343 truespeech_correlate_filter(c);
00344 truespeech_filters_merge(c);
00345
00346 for(i = 0; i < 4; i++) {
00347 truespeech_apply_twopoint_filter(c, i);
00348 truespeech_place_pulses (c, samples, i);
00349 truespeech_update_filters(c, samples, i);
00350 truespeech_synth (c, samples, i);
00351 samples += 60;
00352 }
00353
00354 truespeech_save_prevvec(c);
00355 }
00356
00357 *got_frame_ptr = 1;
00358 *(AVFrame *)data = c->frame;
00359
00360 return buf_size;
00361 }
00362
00363 AVCodec ff_truespeech_decoder = {
00364 .name = "truespeech",
00365 .type = AVMEDIA_TYPE_AUDIO,
00366 .id = AV_CODEC_ID_TRUESPEECH,
00367 .priv_data_size = sizeof(TSContext),
00368 .init = truespeech_decode_init,
00369 .decode = truespeech_decode_frame,
00370 .capabilities = CODEC_CAP_DR1,
00371 .long_name = NULL_IF_CONFIG_SMALL("DSP Group TrueSpeech"),
00372 };