FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
alac.c
Go to the documentation of this file.
1 /*
2  * ALAC (Apple Lossless Audio Codec) decoder
3  * Copyright (c) 2005 David Hammerton
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * ALAC (Apple Lossless Audio Codec) decoder
25  * @author 2005 David Hammerton
26  * @see http://crazney.net/programs/itunes/alac.html
27  *
28  * Note: This decoder expects a 36-byte QuickTime atom to be
29  * passed through the extradata[_size] fields. This atom is tacked onto
30  * the end of an 'alac' stsd atom and has the following format:
31  *
32  * 32bit atom size
33  * 32bit tag ("alac")
34  * 32bit tag version (0)
35  * 32bit samples per frame (used when not set explicitly in the frames)
36  * 8bit compatible version (0)
37  * 8bit sample size
38  * 8bit history mult (40)
39  * 8bit initial history (14)
40  * 8bit rice param limit (10)
41  * 8bit channels
42  * 16bit maxRun (255)
43  * 32bit max coded frame size (0 means unknown)
44  * 32bit average bitrate (0 means unknown)
45  * 32bit samplerate
46  */
47 
49 #include "avcodec.h"
50 #include "get_bits.h"
51 #include "bytestream.h"
52 #include "internal.h"
53 #include "unary.h"
54 #include "mathops.h"
55 #include "alac_data.h"
56 
57 #define ALAC_EXTRADATA_SIZE 36
58 
59 typedef struct {
63  int channels;
64 
65  int32_t *predict_error_buffer[2];
66  int32_t *output_samples_buffer[2];
67  int32_t *extra_bits_buffer[2];
68 
74 
75  int extra_bits; /**< number of extra bits beyond 16-bit */
76  int nb_samples; /**< number of samples in the current frame */
77 
79 } ALACContext;
80 
81 static inline unsigned int decode_scalar(GetBitContext *gb, int k, int bps)
82 {
83  unsigned int x = get_unary_0_9(gb);
84 
85  if (x > 8) { /* RICE THRESHOLD */
86  /* use alternative encoding */
87  x = get_bits_long(gb, bps);
88  } else if (k != 1) {
89  int extrabits = show_bits(gb, k);
90 
91  /* multiply x by 2^k - 1, as part of their strange algorithm */
92  x = (x << k) - x;
93 
94  if (extrabits > 1) {
95  x += extrabits - 1;
96  skip_bits(gb, k);
97  } else
98  skip_bits(gb, k - 1);
99  }
100  return x;
101 }
102 
103 static int rice_decompress(ALACContext *alac, int32_t *output_buffer,
104  int nb_samples, int bps, int rice_history_mult)
105 {
106  int i;
107  unsigned int history = alac->rice_initial_history;
108  int sign_modifier = 0;
109 
110  for (i = 0; i < nb_samples; i++) {
111  int k;
112  unsigned int x;
113 
114  if(get_bits_left(&alac->gb) <= 0)
115  return -1;
116 
117  /* calculate rice param and decode next value */
118  k = av_log2((history >> 9) + 3);
119  k = FFMIN(k, alac->rice_limit);
120  x = decode_scalar(&alac->gb, k, bps);
121  x += sign_modifier;
122  sign_modifier = 0;
123  output_buffer[i] = (x >> 1) ^ -(x & 1);
124 
125  /* update the history */
126  if (x > 0xffff)
127  history = 0xffff;
128  else
129  history += x * rice_history_mult -
130  ((history * rice_history_mult) >> 9);
131 
132  /* special case: there may be compressed blocks of 0 */
133  if ((history < 128) && (i + 1 < nb_samples)) {
134  int block_size;
135 
136  /* calculate rice param and decode block size */
137  k = 7 - av_log2(history) + ((history + 16) >> 6);
138  k = FFMIN(k, alac->rice_limit);
139  block_size = decode_scalar(&alac->gb, k, 16);
140 
141  if (block_size > 0) {
142  if (block_size >= nb_samples - i) {
143  av_log(alac->avctx, AV_LOG_ERROR,
144  "invalid zero block size of %d %d %d\n", block_size,
145  nb_samples, i);
146  block_size = nb_samples - i - 1;
147  }
148  memset(&output_buffer[i + 1], 0,
149  block_size * sizeof(*output_buffer));
150  i += block_size;
151  }
152  if (block_size <= 0xffff)
153  sign_modifier = 1;
154  history = 0;
155  }
156  }
157  return 0;
158 }
159 
160 static inline int sign_only(int v)
161 {
162  return v ? FFSIGN(v) : 0;
163 }
164 
165 static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out,
166  int nb_samples, int bps, int16_t *lpc_coefs,
167  int lpc_order, int lpc_quant)
168 {
169  int i;
170  int32_t *pred = buffer_out;
171 
172  /* first sample always copies */
173  *buffer_out = *error_buffer;
174 
175  if (nb_samples <= 1)
176  return;
177 
178  if (!lpc_order) {
179  memcpy(&buffer_out[1], &error_buffer[1],
180  (nb_samples - 1) * sizeof(*buffer_out));
181  return;
182  }
183 
184  if (lpc_order == 31) {
185  /* simple 1st-order prediction */
186  for (i = 1; i < nb_samples; i++) {
187  buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i],
188  bps);
189  }
190  return;
191  }
192 
193  /* read warm-up samples */
194  for (i = 1; i <= lpc_order && i < nb_samples; i++)
195  buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], bps);
196 
197  /* NOTE: 4 and 8 are very common cases that could be optimized. */
198 
199  for (; i < nb_samples; i++) {
200  int j;
201  int val = 0;
202  int error_val = error_buffer[i];
203  int error_sign;
204  int d = *pred++;
205 
206  /* LPC prediction */
207  for (j = 0; j < lpc_order; j++)
208  val += (pred[j] - d) * lpc_coefs[j];
209  val = (val + (1 << (lpc_quant - 1))) >> lpc_quant;
210  val += d + error_val;
211  buffer_out[i] = sign_extend(val, bps);
212 
213  /* adapt LPC coefficients */
214  error_sign = sign_only(error_val);
215  if (error_sign) {
216  for (j = 0; j < lpc_order && error_val * error_sign > 0; j++) {
217  int sign;
218  val = d - pred[j];
219  sign = sign_only(val) * error_sign;
220  lpc_coefs[j] -= sign;
221  val *= sign;
222  error_val -= (val >> lpc_quant) * (j + 1);
223  }
224  }
225  }
226 }
227 
229  int decorr_shift, int decorr_left_weight)
230 {
231  int i;
232 
233  for (i = 0; i < nb_samples; i++) {
234  int32_t a, b;
235 
236  a = buffer[0][i];
237  b = buffer[1][i];
238 
239  a -= (b * decorr_left_weight) >> decorr_shift;
240  b += a;
241 
242  buffer[0][i] = b;
243  buffer[1][i] = a;
244  }
245 }
246 
247 static void append_extra_bits(int32_t *buffer[2], int32_t *extra_bits_buffer[2],
248  int extra_bits, int channels, int nb_samples)
249 {
250  int i, ch;
251 
252  for (ch = 0; ch < channels; ch++)
253  for (i = 0; i < nb_samples; i++)
254  buffer[ch][i] = (buffer[ch][i] << extra_bits) | extra_bits_buffer[ch][i];
255 }
256 
257 static int decode_element(AVCodecContext *avctx, void *data, int ch_index,
258  int channels)
259 {
260  ALACContext *alac = avctx->priv_data;
261  int has_size, bps, is_compressed, decorr_shift, decorr_left_weight, ret;
262  uint32_t output_samples;
263  int i, ch;
264 
265  skip_bits(&alac->gb, 4); /* element instance tag */
266  skip_bits(&alac->gb, 12); /* unused header bits */
267 
268  /* the number of output samples is stored in the frame */
269  has_size = get_bits1(&alac->gb);
270 
271  alac->extra_bits = get_bits(&alac->gb, 2) << 3;
272  bps = alac->sample_size - alac->extra_bits + channels - 1;
273  if (bps > 32U) {
274  av_log(avctx, AV_LOG_ERROR, "bps is unsupported: %d\n", bps);
275  return AVERROR_PATCHWELCOME;
276  }
277 
278  /* whether the frame is compressed */
279  is_compressed = !get_bits1(&alac->gb);
280 
281  if (has_size)
282  output_samples = get_bits_long(&alac->gb, 32);
283  else
284  output_samples = alac->max_samples_per_frame;
285  if (!output_samples || output_samples > alac->max_samples_per_frame) {
286  av_log(avctx, AV_LOG_ERROR, "invalid samples per frame: %d\n",
287  output_samples);
288  return AVERROR_INVALIDDATA;
289  }
290  if (!alac->nb_samples) {
291  /* get output buffer */
292  alac->frame.nb_samples = output_samples;
293  if ((ret = ff_get_buffer(avctx, &alac->frame)) < 0) {
294  av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
295  return ret;
296  }
297  } else if (output_samples != alac->nb_samples) {
298  av_log(avctx, AV_LOG_ERROR, "sample count mismatch: %u != %d\n",
299  output_samples, alac->nb_samples);
300  return AVERROR_INVALIDDATA;
301  }
302  alac->nb_samples = output_samples;
303  if (alac->direct_output) {
304  for (ch = 0; ch < channels; ch++)
305  alac->output_samples_buffer[ch] = (int32_t *)alac->frame.extended_data[ch_index + ch];
306  }
307 
308  if (is_compressed) {
309  int16_t lpc_coefs[2][32];
310  int lpc_order[2];
311  int prediction_type[2];
312  int lpc_quant[2];
313  int rice_history_mult[2];
314 
315  decorr_shift = get_bits(&alac->gb, 8);
316  decorr_left_weight = get_bits(&alac->gb, 8);
317 
318  for (ch = 0; ch < channels; ch++) {
319  prediction_type[ch] = get_bits(&alac->gb, 4);
320  lpc_quant[ch] = get_bits(&alac->gb, 4);
321  rice_history_mult[ch] = get_bits(&alac->gb, 3);
322  lpc_order[ch] = get_bits(&alac->gb, 5);
323 
324  /* read the predictor table */
325  for (i = lpc_order[ch] - 1; i >= 0; i--)
326  lpc_coefs[ch][i] = get_sbits(&alac->gb, 16);
327  }
328 
329  if (alac->extra_bits) {
330  for (i = 0; i < alac->nb_samples; i++) {
331  if(get_bits_left(&alac->gb) <= 0)
332  return -1;
333  for (ch = 0; ch < channels; ch++)
334  alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
335  }
336  }
337  for (ch = 0; ch < channels; ch++) {
338  int ret=rice_decompress(alac, alac->predict_error_buffer[ch],
339  alac->nb_samples, bps,
340  rice_history_mult[ch] * alac->rice_history_mult / 4);
341  if(ret<0)
342  return ret;
343 
344  /* adaptive FIR filter */
345  if (prediction_type[ch] == 15) {
346  /* Prediction type 15 runs the adaptive FIR twice.
347  * The first pass uses the special-case coef_num = 31, while
348  * the second pass uses the coefs from the bitstream.
349  *
350  * However, this prediction type is not currently used by the
351  * reference encoder.
352  */
354  alac->predict_error_buffer[ch],
355  alac->nb_samples, bps, NULL, 31, 0);
356  } else if (prediction_type[ch] > 0) {
357  av_log(avctx, AV_LOG_WARNING, "unknown prediction type: %i\n",
358  prediction_type[ch]);
359  }
361  alac->output_samples_buffer[ch], alac->nb_samples,
362  bps, lpc_coefs[ch], lpc_order[ch], lpc_quant[ch]);
363  }
364  } else {
365  /* not compressed, easy case */
366  for (i = 0; i < alac->nb_samples; i++) {
367  if(get_bits_left(&alac->gb) <= 0)
368  return -1;
369  for (ch = 0; ch < channels; ch++) {
370  alac->output_samples_buffer[ch][i] =
371  get_sbits_long(&alac->gb, alac->sample_size);
372  }
373  }
374  alac->extra_bits = 0;
375  decorr_shift = 0;
376  decorr_left_weight = 0;
377  }
378 
379  if (channels == 2 && decorr_left_weight) {
381  decorr_shift, decorr_left_weight);
382  }
383 
384  if (alac->extra_bits) {
386  alac->extra_bits, channels, alac->nb_samples);
387  }
388 
389  if(av_sample_fmt_is_planar(avctx->sample_fmt)) {
390  switch(alac->sample_size) {
391  case 16: {
392  for (ch = 0; ch < channels; ch++) {
393  int16_t *outbuffer = (int16_t *)alac->frame.extended_data[ch_index + ch];
394  for (i = 0; i < alac->nb_samples; i++)
395  *outbuffer++ = alac->output_samples_buffer[ch][i];
396  }}
397  break;
398  case 24: {
399  for (ch = 0; ch < channels; ch++) {
400  for (i = 0; i < alac->nb_samples; i++)
401  alac->output_samples_buffer[ch][i] <<= 8;
402  }}
403  break;
404  }
405  }else{
406  switch(alac->sample_size) {
407  case 16: {
408  int16_t *outbuffer = ((int16_t *)alac->frame.extended_data[0]) + ch_index;
409  for (i = 0; i < alac->nb_samples; i++) {
410  for (ch = 0; ch < channels; ch++)
411  *outbuffer++ = alac->output_samples_buffer[ch][i];
412  outbuffer += alac->channels - channels;
413  }
414  }
415  break;
416  case 24: {
417  int32_t *outbuffer = ((int32_t *)alac->frame.extended_data[0]) + ch_index;
418  for (i = 0; i < alac->nb_samples; i++) {
419  for (ch = 0; ch < channels; ch++)
420  *outbuffer++ = alac->output_samples_buffer[ch][i] << 8;
421  outbuffer += alac->channels - channels;
422  }
423  }
424  break;
425  case 32: {
426  int32_t *outbuffer = ((int32_t *)alac->frame.extended_data[0]) + ch_index;
427  for (i = 0; i < alac->nb_samples; i++) {
428  for (ch = 0; ch < channels; ch++)
429  *outbuffer++ = alac->output_samples_buffer[ch][i];
430  outbuffer += alac->channels - channels;
431  }
432  }
433  break;
434  }
435  }
436 
437  return 0;
438 }
439 
440 static int alac_decode_frame(AVCodecContext *avctx, void *data,
441  int *got_frame_ptr, AVPacket *avpkt)
442 {
443  ALACContext *alac = avctx->priv_data;
444  enum AlacRawDataBlockType element;
445  int channels;
446  int ch, ret, got_end;
447 
448  init_get_bits(&alac->gb, avpkt->data, avpkt->size * 8);
449 
450  got_end = 0;
451  alac->nb_samples = 0;
452  ch = 0;
453  while (get_bits_left(&alac->gb) >= 3) {
454  element = get_bits(&alac->gb, 3);
455  if (element == TYPE_END) {
456  got_end = 1;
457  break;
458  }
459  if (element > TYPE_CPE && element != TYPE_LFE) {
460  av_log(avctx, AV_LOG_ERROR, "syntax element unsupported: %d\n", element);
461  return AVERROR_PATCHWELCOME;
462  }
463 
464  channels = (element == TYPE_CPE) ? 2 : 1;
465  if ( ch + channels > alac->channels
466  || ff_alac_channel_layout_offsets[alac->channels - 1][ch] + channels > alac->channels
467  ) {
468  av_log(avctx, AV_LOG_ERROR, "invalid element channel count\n");
469  return AVERROR_INVALIDDATA;
470  }
471 
472  ret = decode_element(avctx, data,
474  channels);
475  if (ret < 0 && get_bits_left(&alac->gb))
476  return ret;
477 
478  ch += channels;
479  }
480  if (!got_end) {
481  av_log(avctx, AV_LOG_ERROR, "no end tag found. incomplete packet.\n");
482  return AVERROR_INVALIDDATA;
483  }
484 
485  if (avpkt->size * 8 - get_bits_count(&alac->gb) > 8) {
486  av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n",
487  avpkt->size * 8 - get_bits_count(&alac->gb));
488  }
489 
490  *got_frame_ptr = 1;
491  *(AVFrame *)data = alac->frame;
492 
493  return avpkt->size;
494 }
495 
497 {
498  ALACContext *alac = avctx->priv_data;
499 
500  int ch;
501  for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
502  av_freep(&alac->predict_error_buffer[ch]);
503  if (!alac->direct_output)
504  av_freep(&alac->output_samples_buffer[ch]);
505  av_freep(&alac->extra_bits_buffer[ch]);
506  }
507 
508  return 0;
509 }
510 
511 static int allocate_buffers(ALACContext *alac)
512 {
513  int ch;
514  int buf_size;
515 
516  if (alac->max_samples_per_frame > INT_MAX / sizeof(int32_t))
517  goto buf_alloc_fail;
518  buf_size = alac->max_samples_per_frame * sizeof(int32_t);
519 
520  for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
522  buf_size, buf_alloc_fail);
523 
524  alac->direct_output = alac->sample_size > 16 && av_sample_fmt_is_planar(alac->avctx->sample_fmt);
525  if (!alac->direct_output) {
527  buf_size, buf_alloc_fail);
528  }
529 
530  FF_ALLOC_OR_GOTO(alac->avctx, alac->extra_bits_buffer[ch],
531  buf_size, buf_alloc_fail);
532  }
533  return 0;
534 buf_alloc_fail:
535  alac_decode_close(alac->avctx);
536  return AVERROR(ENOMEM);
537 }
538 
539 static int alac_set_info(ALACContext *alac)
540 {
541  GetByteContext gb;
542 
543  bytestream2_init(&gb, alac->avctx->extradata,
544  alac->avctx->extradata_size);
545 
546  bytestream2_skipu(&gb, 12); // size:4, alac:4, version:4
547 
548  alac->max_samples_per_frame = bytestream2_get_be32u(&gb);
549  if (!alac->max_samples_per_frame || alac->max_samples_per_frame > INT_MAX) {
550  av_log(alac->avctx, AV_LOG_ERROR, "max samples per frame invalid: %u\n",
551  alac->max_samples_per_frame);
552  return AVERROR_INVALIDDATA;
553  }
554  bytestream2_skipu(&gb, 1); // compatible version
555  alac->sample_size = bytestream2_get_byteu(&gb);
556  alac->rice_history_mult = bytestream2_get_byteu(&gb);
557  alac->rice_initial_history = bytestream2_get_byteu(&gb);
558  alac->rice_limit = bytestream2_get_byteu(&gb);
559  alac->channels = bytestream2_get_byteu(&gb);
560  bytestream2_get_be16u(&gb); // maxRun
561  bytestream2_get_be32u(&gb); // max coded frame size
562  bytestream2_get_be32u(&gb); // average bitrate
563  bytestream2_get_be32u(&gb); // samplerate
564 
565  return 0;
566 }
567 
569 {
570  int ret;
571  int req_packed;
572  ALACContext *alac = avctx->priv_data;
573  alac->avctx = avctx;
574 
575  /* initialize from the extradata */
577  av_log(avctx, AV_LOG_ERROR, "extradata is too small\n");
578  return AVERROR_INVALIDDATA;
579  }
580  if (alac_set_info(alac)) {
581  av_log(avctx, AV_LOG_ERROR, "set_info failed\n");
582  return -1;
583  }
584 
586  switch (alac->sample_size) {
587  case 16: avctx->sample_fmt = req_packed ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_S16P;
588  break;
589  case 24:
590  case 32: avctx->sample_fmt = req_packed ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S32P;
591  break;
592  default: av_log_ask_for_sample(avctx, "Sample depth %d is not supported.\n",
593  alac->sample_size);
594  return AVERROR_PATCHWELCOME;
595  }
596  avctx->bits_per_raw_sample = alac->sample_size;
597 
598  if (alac->channels < 1) {
599  av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n");
600  alac->channels = avctx->channels;
601  } else {
602  if (alac->channels > ALAC_MAX_CHANNELS)
603  alac->channels = avctx->channels;
604  else
605  avctx->channels = alac->channels;
606  }
607  if (avctx->channels > ALAC_MAX_CHANNELS || avctx->channels <= 0 ) {
608  av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
609  avctx->channels);
610  return AVERROR_PATCHWELCOME;
611  }
612  avctx->channel_layout = ff_alac_channel_layouts[alac->channels - 1];
613 
614  if ((ret = allocate_buffers(alac)) < 0) {
615  av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n");
616  return ret;
617  }
618 
620  avctx->coded_frame = &alac->frame;
621 
622  return 0;
623 }
624 
626  .name = "alac",
627  .type = AVMEDIA_TYPE_AUDIO,
628  .id = AV_CODEC_ID_ALAC,
629  .priv_data_size = sizeof(ALACContext),
633  .capabilities = CODEC_CAP_DR1,
634  .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
635 };