FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
sonic.c
Go to the documentation of this file.
1 /*
2  * Simple free lossless/lossy audio codec
3  * Copyright (c) 2004 Alex Beregszaszi
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 #include "avcodec.h"
22 #include "get_bits.h"
23 #include "golomb.h"
24 #include "internal.h"
25 
26 /**
27  * @file
28  * Simple free lossless/lossy audio codec
29  * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
30  * Written and designed by Alex Beregszaszi
31  *
32  * TODO:
33  * - CABAC put/get_symbol
34  * - independent quantizer for channels
35  * - >2 channels support
36  * - more decorrelation types
37  * - more tap_quant tests
38  * - selectable intlist writers/readers (bonk-style, golomb, cabac)
39  */
40 
41 #define MAX_CHANNELS 2
42 
43 #define MID_SIDE 0
44 #define LEFT_SIDE 1
45 #define RIGHT_SIDE 2
46 
47 typedef struct SonicContext {
50 
52  double quantization;
53 
55 
56  int *tap_quant;
59 
60  // for encoding
61  int *tail;
62  int tail_size;
63  int *window;
65 
66  // for decoding
69 } SonicContext;
70 
71 #define LATTICE_SHIFT 10
72 #define SAMPLE_SHIFT 4
73 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
74 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
75 
76 #define BASE_QUANT 0.6
77 #define RATE_VARIATION 3.0
78 
79 static inline int divide(int a, int b)
80 {
81  if (a < 0)
82  return -( (-a + b/2)/b );
83  else
84  return (a + b/2)/b;
85 }
86 
87 static inline int shift(int a,int b)
88 {
89  return (a+(1<<(b-1))) >> b;
90 }
91 
92 static inline int shift_down(int a,int b)
93 {
94  return (a>>b)+((a<0)?1:0);
95 }
96 
97 #if 1
98 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
99 {
100  int i;
101 
102  for (i = 0; i < entries; i++)
103  set_se_golomb(pb, buf[i]);
104 
105  return 1;
106 }
107 
108 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
109 {
110  int i;
111 
112  for (i = 0; i < entries; i++)
113  buf[i] = get_se_golomb(gb);
114 
115  return 1;
116 }
117 
118 #else
119 
120 #define ADAPT_LEVEL 8
121 
122 static int bits_to_store(uint64_t x)
123 {
124  int res = 0;
125 
126  while(x)
127  {
128  res++;
129  x >>= 1;
130  }
131  return res;
132 }
133 
134 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
135 {
136  int i, bits;
137 
138  if (!max)
139  return;
140 
141  bits = bits_to_store(max);
142 
143  for (i = 0; i < bits-1; i++)
144  put_bits(pb, 1, value & (1 << i));
145 
146  if ( (value | (1 << (bits-1))) <= max)
147  put_bits(pb, 1, value & (1 << (bits-1)));
148 }
149 
150 static unsigned int read_uint_max(GetBitContext *gb, int max)
151 {
152  int i, bits, value = 0;
153 
154  if (!max)
155  return 0;
156 
157  bits = bits_to_store(max);
158 
159  for (i = 0; i < bits-1; i++)
160  if (get_bits1(gb))
161  value += 1 << i;
162 
163  if ( (value | (1<<(bits-1))) <= max)
164  if (get_bits1(gb))
165  value += 1 << (bits-1);
166 
167  return value;
168 }
169 
170 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
171 {
172  int i, j, x = 0, low_bits = 0, max = 0;
173  int step = 256, pos = 0, dominant = 0, any = 0;
174  int *copy, *bits;
175 
176  copy = av_mallocz(4* entries);
177  if (!copy)
178  return -1;
179 
180  if (base_2_part)
181  {
182  int energy = 0;
183 
184  for (i = 0; i < entries; i++)
185  energy += abs(buf[i]);
186 
187  low_bits = bits_to_store(energy / (entries * 2));
188  if (low_bits > 15)
189  low_bits = 15;
190 
191  put_bits(pb, 4, low_bits);
192  }
193 
194  for (i = 0; i < entries; i++)
195  {
196  put_bits(pb, low_bits, abs(buf[i]));
197  copy[i] = abs(buf[i]) >> low_bits;
198  if (copy[i] > max)
199  max = abs(copy[i]);
200  }
201 
202  bits = av_mallocz(4* entries*max);
203  if (!bits)
204  {
205 // av_free(copy);
206  return -1;
207  }
208 
209  for (i = 0; i <= max; i++)
210  {
211  for (j = 0; j < entries; j++)
212  if (copy[j] >= i)
213  bits[x++] = copy[j] > i;
214  }
215 
216  // store bitstream
217  while (pos < x)
218  {
219  int steplet = step >> 8;
220 
221  if (pos + steplet > x)
222  steplet = x - pos;
223 
224  for (i = 0; i < steplet; i++)
225  if (bits[i+pos] != dominant)
226  any = 1;
227 
228  put_bits(pb, 1, any);
229 
230  if (!any)
231  {
232  pos += steplet;
233  step += step / ADAPT_LEVEL;
234  }
235  else
236  {
237  int interloper = 0;
238 
239  while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
240  interloper++;
241 
242  // note change
243  write_uint_max(pb, interloper, (step >> 8) - 1);
244 
245  pos += interloper + 1;
246  step -= step / ADAPT_LEVEL;
247  }
248 
249  if (step < 256)
250  {
251  step = 65536 / step;
252  dominant = !dominant;
253  }
254  }
255 
256  // store signs
257  for (i = 0; i < entries; i++)
258  if (buf[i])
259  put_bits(pb, 1, buf[i] < 0);
260 
261 // av_free(bits);
262 // av_free(copy);
263 
264  return 0;
265 }
266 
267 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
268 {
269  int i, low_bits = 0, x = 0;
270  int n_zeros = 0, step = 256, dominant = 0;
271  int pos = 0, level = 0;
272  int *bits = av_mallocz(4* entries);
273 
274  if (!bits)
275  return -1;
276 
277  if (base_2_part)
278  {
279  low_bits = get_bits(gb, 4);
280 
281  if (low_bits)
282  for (i = 0; i < entries; i++)
283  buf[i] = get_bits(gb, low_bits);
284  }
285 
286 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
287 
288  while (n_zeros < entries)
289  {
290  int steplet = step >> 8;
291 
292  if (!get_bits1(gb))
293  {
294  for (i = 0; i < steplet; i++)
295  bits[x++] = dominant;
296 
297  if (!dominant)
298  n_zeros += steplet;
299 
300  step += step / ADAPT_LEVEL;
301  }
302  else
303  {
304  int actual_run = read_uint_max(gb, steplet-1);
305 
306 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
307 
308  for (i = 0; i < actual_run; i++)
309  bits[x++] = dominant;
310 
311  bits[x++] = !dominant;
312 
313  if (!dominant)
314  n_zeros += actual_run;
315  else
316  n_zeros++;
317 
318  step -= step / ADAPT_LEVEL;
319  }
320 
321  if (step < 256)
322  {
323  step = 65536 / step;
324  dominant = !dominant;
325  }
326  }
327 
328  // reconstruct unsigned values
329  n_zeros = 0;
330  for (i = 0; n_zeros < entries; i++)
331  {
332  while(1)
333  {
334  if (pos >= entries)
335  {
336  pos = 0;
337  level += 1 << low_bits;
338  }
339 
340  if (buf[pos] >= level)
341  break;
342 
343  pos++;
344  }
345 
346  if (bits[i])
347  buf[pos] += 1 << low_bits;
348  else
349  n_zeros++;
350 
351  pos++;
352  }
353 // av_free(bits);
354 
355  // read signs
356  for (i = 0; i < entries; i++)
357  if (buf[i] && get_bits1(gb))
358  buf[i] = -buf[i];
359 
360 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
361 
362  return 0;
363 }
364 #endif
365 
366 static void predictor_init_state(int *k, int *state, int order)
367 {
368  int i;
369 
370  for (i = order-2; i >= 0; i--)
371  {
372  int j, p, x = state[i];
373 
374  for (j = 0, p = i+1; p < order; j++,p++)
375  {
376  int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
377  state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
378  x = tmp;
379  }
380  }
381 }
382 
383 static int predictor_calc_error(int *k, int *state, int order, int error)
384 {
385  int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
386 
387 #if 1
388  int *k_ptr = &(k[order-2]),
389  *state_ptr = &(state[order-2]);
390  for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
391  {
392  int k_value = *k_ptr, state_value = *state_ptr;
393  x -= shift_down(k_value * state_value, LATTICE_SHIFT);
394  state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
395  }
396 #else
397  for (i = order-2; i >= 0; i--)
398  {
399  x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
400  state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
401  }
402 #endif
403 
404  // don't drift too far, to avoid overflows
405  if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
406  if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
407 
408  state[0] = x;
409 
410  return x;
411 }
412 
413 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
414 // Heavily modified Levinson-Durbin algorithm which
415 // copes better with quantization, and calculates the
416 // actual whitened result as it goes.
417 
418 static void modified_levinson_durbin(int *window, int window_entries,
419  int *out, int out_entries, int channels, int *tap_quant)
420 {
421  int i;
422  int *state = av_mallocz(4* window_entries);
423 
424  memcpy(state, window, 4* window_entries);
425 
426  for (i = 0; i < out_entries; i++)
427  {
428  int step = (i+1)*channels, k, j;
429  double xx = 0.0, xy = 0.0;
430 #if 1
431  int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
432  j = window_entries - step;
433  for (;j>=0;j--,x_ptr++,state_ptr++)
434  {
435  double x_value = *x_ptr, state_value = *state_ptr;
436  xx += state_value*state_value;
437  xy += x_value*state_value;
438  }
439 #else
440  for (j = 0; j <= (window_entries - step); j++);
441  {
442  double stepval = window[step+j], stateval = window[j];
443 // xx += (double)window[j]*(double)window[j];
444 // xy += (double)window[step+j]*(double)window[j];
445  xx += stateval*stateval;
446  xy += stepval*stateval;
447  }
448 #endif
449  if (xx == 0.0)
450  k = 0;
451  else
452  k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
453 
454  if (k > (LATTICE_FACTOR/tap_quant[i]))
455  k = LATTICE_FACTOR/tap_quant[i];
456  if (-k > (LATTICE_FACTOR/tap_quant[i]))
457  k = -(LATTICE_FACTOR/tap_quant[i]);
458 
459  out[i] = k;
460  k *= tap_quant[i];
461 
462 #if 1
463  x_ptr = &(window[step]);
464  state_ptr = &(state[0]);
465  j = window_entries - step;
466  for (;j>=0;j--,x_ptr++,state_ptr++)
467  {
468  int x_value = *x_ptr, state_value = *state_ptr;
469  *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
470  *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
471  }
472 #else
473  for (j=0; j <= (window_entries - step); j++)
474  {
475  int stepval = window[step+j], stateval=state[j];
476  window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
477  state[j] += shift_down(k * stepval, LATTICE_SHIFT);
478  }
479 #endif
480  }
481 
482  av_free(state);
483 }
484 
485 static inline int code_samplerate(int samplerate)
486 {
487  switch (samplerate)
488  {
489  case 44100: return 0;
490  case 22050: return 1;
491  case 11025: return 2;
492  case 96000: return 3;
493  case 48000: return 4;
494  case 32000: return 5;
495  case 24000: return 6;
496  case 16000: return 7;
497  case 8000: return 8;
498  }
499  return -1;
500 }
501 
502 static av_cold int sonic_encode_init(AVCodecContext *avctx)
503 {
504  SonicContext *s = avctx->priv_data;
505  PutBitContext pb;
506  int i, version = 0;
507 
508  if (avctx->channels > MAX_CHANNELS)
509  {
510  av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
511  return -1; /* only stereo or mono for now */
512  }
513 
514  if (avctx->channels == 2)
515  s->decorrelation = MID_SIDE;
516 
517  if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
518  {
519  s->lossless = 1;
520  s->num_taps = 32;
521  s->downsampling = 1;
522  s->quantization = 0.0;
523  }
524  else
525  {
526  s->num_taps = 128;
527  s->downsampling = 2;
528  s->quantization = 1.0;
529  }
530 
531  // max tap 2048
532  if ((s->num_taps < 32) || (s->num_taps > 1024) ||
533  ((s->num_taps>>5)<<5 != s->num_taps))
534  {
535  av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
536  return -1;
537  }
538 
539  // generate taps
540  s->tap_quant = av_mallocz(4* s->num_taps);
541  for (i = 0; i < s->num_taps; i++)
542  s->tap_quant[i] = (int)(sqrt(i+1));
543 
544  s->channels = avctx->channels;
545  s->samplerate = avctx->sample_rate;
546 
547  s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
549 
550  s->tail_size = s->num_taps*s->channels;
551  s->tail = av_mallocz(4 * s->tail_size);
552  if (!s->tail)
553  return -1;
554 
555  s->predictor_k = av_mallocz(4 * s->num_taps);
556  if (!s->predictor_k)
557  return -1;
558 
559  for (i = 0; i < s->channels; i++)
560  {
561  s->coded_samples[i] = av_mallocz(4* s->block_align);
562  if (!s->coded_samples[i])
563  return -1;
564  }
565 
566  s->int_samples = av_mallocz(4* s->frame_size);
567 
568  s->window_size = ((2*s->tail_size)+s->frame_size);
569  s->window = av_mallocz(4* s->window_size);
570  if (!s->window)
571  return -1;
572 
573  avctx->extradata = av_mallocz(16);
574  if (!avctx->extradata)
575  return -1;
576  init_put_bits(&pb, avctx->extradata, 16*8);
577 
578  put_bits(&pb, 2, version); // version
579  if (version == 1)
580  {
581  put_bits(&pb, 2, s->channels);
582  put_bits(&pb, 4, code_samplerate(s->samplerate));
583  }
584  put_bits(&pb, 1, s->lossless);
585  if (!s->lossless)
586  put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
587  put_bits(&pb, 2, s->decorrelation);
588  put_bits(&pb, 2, s->downsampling);
589  put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
590  put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
591 
592  flush_put_bits(&pb);
593  avctx->extradata_size = put_bits_count(&pb)/8;
594 
595  av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
596  version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
597 
598  avctx->coded_frame = avcodec_alloc_frame();
599  if (!avctx->coded_frame)
600  return AVERROR(ENOMEM);
601  avctx->coded_frame->key_frame = 1;
602  avctx->frame_size = s->block_align*s->downsampling;
603 
604  return 0;
605 }
606 
607 static av_cold int sonic_encode_close(AVCodecContext *avctx)
608 {
609  SonicContext *s = avctx->priv_data;
610  int i;
611 
612  av_freep(&avctx->coded_frame);
613 
614  for (i = 0; i < s->channels; i++)
615  av_free(s->coded_samples[i]);
616 
617  av_free(s->predictor_k);
618  av_free(s->tail);
619  av_free(s->tap_quant);
620  av_free(s->window);
621  av_free(s->int_samples);
622 
623  return 0;
624 }
625 
626 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
627  const AVFrame *frame, int *got_packet_ptr)
628 {
629  SonicContext *s = avctx->priv_data;
630  PutBitContext pb;
631  int i, j, ch, quant = 0, x = 0;
632  int ret;
633  const short *samples = (const int16_t*)frame->data[0];
634 
635  if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)))
636  return ret;
637 
638  init_put_bits(&pb, avpkt->data, avpkt->size);
639 
640  // short -> internal
641  for (i = 0; i < s->frame_size; i++)
642  s->int_samples[i] = samples[i];
643 
644  if (!s->lossless)
645  for (i = 0; i < s->frame_size; i++)
646  s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
647 
648  switch(s->decorrelation)
649  {
650  case MID_SIDE:
651  for (i = 0; i < s->frame_size; i += s->channels)
652  {
653  s->int_samples[i] += s->int_samples[i+1];
654  s->int_samples[i+1] -= shift(s->int_samples[i], 1);
655  }
656  break;
657  case LEFT_SIDE:
658  for (i = 0; i < s->frame_size; i += s->channels)
659  s->int_samples[i+1] -= s->int_samples[i];
660  break;
661  case RIGHT_SIDE:
662  for (i = 0; i < s->frame_size; i += s->channels)
663  s->int_samples[i] -= s->int_samples[i+1];
664  break;
665  }
666 
667  memset(s->window, 0, 4* s->window_size);
668 
669  for (i = 0; i < s->tail_size; i++)
670  s->window[x++] = s->tail[i];
671 
672  for (i = 0; i < s->frame_size; i++)
673  s->window[x++] = s->int_samples[i];
674 
675  for (i = 0; i < s->tail_size; i++)
676  s->window[x++] = 0;
677 
678  for (i = 0; i < s->tail_size; i++)
679  s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
680 
681  // generate taps
682  modified_levinson_durbin(s->window, s->window_size,
683  s->predictor_k, s->num_taps, s->channels, s->tap_quant);
684  if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
685  return -1;
686 
687  for (ch = 0; ch < s->channels; ch++)
688  {
689  x = s->tail_size+ch;
690  for (i = 0; i < s->block_align; i++)
691  {
692  int sum = 0;
693  for (j = 0; j < s->downsampling; j++, x += s->channels)
694  sum += s->window[x];
695  s->coded_samples[ch][i] = sum;
696  }
697  }
698 
699  // simple rate control code
700  if (!s->lossless)
701  {
702  double energy1 = 0.0, energy2 = 0.0;
703  for (ch = 0; ch < s->channels; ch++)
704  {
705  for (i = 0; i < s->block_align; i++)
706  {
707  double sample = s->coded_samples[ch][i];
708  energy2 += sample*sample;
709  energy1 += fabs(sample);
710  }
711  }
712 
713  energy2 = sqrt(energy2/(s->channels*s->block_align));
714  energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
715 
716  // increase bitrate when samples are like a gaussian distribution
717  // reduce bitrate when samples are like a two-tailed exponential distribution
718 
719  if (energy2 > energy1)
720  energy2 += (energy2-energy1)*RATE_VARIATION;
721 
722  quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
723 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
724 
725  if (quant < 1)
726  quant = 1;
727  if (quant > 65534)
728  quant = 65534;
729 
730  set_ue_golomb(&pb, quant);
731 
732  quant *= SAMPLE_FACTOR;
733  }
734 
735  // write out coded samples
736  for (ch = 0; ch < s->channels; ch++)
737  {
738  if (!s->lossless)
739  for (i = 0; i < s->block_align; i++)
740  s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
741 
742  if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
743  return -1;
744  }
745 
746 // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
747 
748  flush_put_bits(&pb);
749  avpkt->size = (put_bits_count(&pb)+7)/8;
750  *got_packet_ptr = 1;
751  return 0;
752 }
753 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
754 
755 #if CONFIG_SONIC_DECODER
756 static const int samplerate_table[] =
757  { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
758 
759 static av_cold int sonic_decode_init(AVCodecContext *avctx)
760 {
761  SonicContext *s = avctx->priv_data;
762  GetBitContext gb;
763  int i, version;
764 
765  s->channels = avctx->channels;
766  s->samplerate = avctx->sample_rate;
767 
769  avctx->coded_frame = &s->frame;
770 
771  if (!avctx->extradata)
772  {
773  av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
774  return -1;
775  }
776 
777  init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
778 
779  version = get_bits(&gb, 2);
780  if (version > 1)
781  {
782  av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
783  return -1;
784  }
785 
786  if (version == 1)
787  {
788  s->channels = get_bits(&gb, 2);
789  s->samplerate = samplerate_table[get_bits(&gb, 4)];
790  av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
791  s->channels, s->samplerate);
792  }
793 
794  if (s->channels > MAX_CHANNELS)
795  {
796  av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
797  return -1;
798  }
799 
800  s->lossless = get_bits1(&gb);
801  if (!s->lossless)
802  skip_bits(&gb, 3); // XXX FIXME
803  s->decorrelation = get_bits(&gb, 2);
804 
805  s->downsampling = get_bits(&gb, 2);
806  if (!s->downsampling) {
807  av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
808  return AVERROR_INVALIDDATA;
809  }
810 
811  s->num_taps = (get_bits(&gb, 5)+1)<<5;
812  if (get_bits1(&gb)) // XXX FIXME
813  av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
814 
815  s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
817 // avctx->frame_size = s->block_align;
818 
819  av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
820  version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
821 
822  // generate taps
823  s->tap_quant = av_mallocz(4* s->num_taps);
824  for (i = 0; i < s->num_taps; i++)
825  s->tap_quant[i] = (int)(sqrt(i+1));
826 
827  s->predictor_k = av_mallocz(4* s->num_taps);
828 
829  for (i = 0; i < s->channels; i++)
830  {
831  s->predictor_state[i] = av_mallocz(4* s->num_taps);
832  if (!s->predictor_state[i])
833  return -1;
834  }
835 
836  for (i = 0; i < s->channels; i++)
837  {
838  s->coded_samples[i] = av_mallocz(4* s->block_align);
839  if (!s->coded_samples[i])
840  return -1;
841  }
842  s->int_samples = av_mallocz(4* s->frame_size);
843 
844  avctx->sample_fmt = AV_SAMPLE_FMT_S16;
845  return 0;
846 }
847 
848 static av_cold int sonic_decode_close(AVCodecContext *avctx)
849 {
850  SonicContext *s = avctx->priv_data;
851  int i;
852 
853  av_free(s->int_samples);
854  av_free(s->tap_quant);
855  av_free(s->predictor_k);
856 
857  for (i = 0; i < s->channels; i++)
858  {
859  av_free(s->predictor_state[i]);
860  av_free(s->coded_samples[i]);
861  }
862 
863  return 0;
864 }
865 
866 static int sonic_decode_frame(AVCodecContext *avctx,
867  void *data, int *got_frame_ptr,
868  AVPacket *avpkt)
869 {
870  const uint8_t *buf = avpkt->data;
871  int buf_size = avpkt->size;
872  SonicContext *s = avctx->priv_data;
873  GetBitContext gb;
874  int i, quant, ch, j, ret;
875  int16_t *samples;
876 
877  if (buf_size == 0) return 0;
878 
879  s->frame.nb_samples = s->frame_size;
880  if ((ret = ff_get_buffer(avctx, &s->frame)) < 0) {
881  av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
882  return ret;
883  }
884  samples = (int16_t *)s->frame.data[0];
885 
886 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
887 
888  init_get_bits(&gb, buf, buf_size*8);
889 
890  intlist_read(&gb, s->predictor_k, s->num_taps, 0);
891 
892  // dequantize
893  for (i = 0; i < s->num_taps; i++)
894  s->predictor_k[i] *= s->tap_quant[i];
895 
896  if (s->lossless)
897  quant = 1;
898  else
899  quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
900 
901 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
902 
903  for (ch = 0; ch < s->channels; ch++)
904  {
905  int x = ch;
906 
908 
909  intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
910 
911  for (i = 0; i < s->block_align; i++)
912  {
913  for (j = 0; j < s->downsampling - 1; j++)
914  {
916  x += s->channels;
917  }
918 
919  s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
920  x += s->channels;
921  }
922 
923  for (i = 0; i < s->num_taps; i++)
924  s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
925  }
926 
927  switch(s->decorrelation)
928  {
929  case MID_SIDE:
930  for (i = 0; i < s->frame_size; i += s->channels)
931  {
932  s->int_samples[i+1] += shift(s->int_samples[i], 1);
933  s->int_samples[i] -= s->int_samples[i+1];
934  }
935  break;
936  case LEFT_SIDE:
937  for (i = 0; i < s->frame_size; i += s->channels)
938  s->int_samples[i+1] += s->int_samples[i];
939  break;
940  case RIGHT_SIDE:
941  for (i = 0; i < s->frame_size; i += s->channels)
942  s->int_samples[i] += s->int_samples[i+1];
943  break;
944  }
945 
946  if (!s->lossless)
947  for (i = 0; i < s->frame_size; i++)
948  s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
949 
950  // internal -> short
951  for (i = 0; i < s->frame_size; i++)
952  samples[i] = av_clip_int16(s->int_samples[i]);
953 
954  align_get_bits(&gb);
955 
956  *got_frame_ptr = 1;
957  *(AVFrame*)data = s->frame;
958 
959  return (get_bits_count(&gb)+7)/8;
960 }
961 
962 AVCodec ff_sonic_decoder = {
963  .name = "sonic",
964  .type = AVMEDIA_TYPE_AUDIO,
965  .id = AV_CODEC_ID_SONIC,
966  .priv_data_size = sizeof(SonicContext),
967  .init = sonic_decode_init,
968  .close = sonic_decode_close,
969  .decode = sonic_decode_frame,
970  .capabilities = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL,
971  .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
972 };
973 #endif /* CONFIG_SONIC_DECODER */
974 
975 #if CONFIG_SONIC_ENCODER
976 AVCodec ff_sonic_encoder = {
977  .name = "sonic",
978  .type = AVMEDIA_TYPE_AUDIO,
979  .id = AV_CODEC_ID_SONIC,
980  .priv_data_size = sizeof(SonicContext),
981  .init = sonic_encode_init,
982  .encode2 = sonic_encode_frame,
983  .capabilities = CODEC_CAP_EXPERIMENTAL,
984  .close = sonic_encode_close,
985  .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
986 };
987 #endif
988 
989 #if CONFIG_SONIC_LS_ENCODER
990 AVCodec ff_sonic_ls_encoder = {
991  .name = "sonicls",
992  .type = AVMEDIA_TYPE_AUDIO,
993  .id = AV_CODEC_ID_SONIC_LS,
994  .priv_data_size = sizeof(SonicContext),
995  .init = sonic_encode_init,
996  .encode2 = sonic_encode_frame,
997  .capabilities = CODEC_CAP_EXPERIMENTAL,
998  .close = sonic_encode_close,
999  .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
1000 };
1001 #endif