FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
alacenc.c
Go to the documentation of this file.
1 /*
2  * ALAC audio encoder
3  * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "avcodec.h"
23 #include "put_bits.h"
24 #include "dsputil.h"
25 #include "internal.h"
26 #include "lpc.h"
27 #include "mathops.h"
28 #include "alac_data.h"
29 
30 #define DEFAULT_FRAME_SIZE 4096
31 #define ALAC_EXTRADATA_SIZE 36
32 #define ALAC_FRAME_HEADER_SIZE 55
33 #define ALAC_FRAME_FOOTER_SIZE 3
34 
35 #define ALAC_ESCAPE_CODE 0x1FF
36 #define ALAC_MAX_LPC_ORDER 30
37 #define DEFAULT_MAX_PRED_ORDER 6
38 #define DEFAULT_MIN_PRED_ORDER 4
39 #define ALAC_MAX_LPC_PRECISION 9
40 #define ALAC_MAX_LPC_SHIFT 9
41 
42 #define ALAC_CHMODE_LEFT_RIGHT 0
43 #define ALAC_CHMODE_LEFT_SIDE 1
44 #define ALAC_CHMODE_RIGHT_SIDE 2
45 #define ALAC_CHMODE_MID_SIDE 3
46 
47 typedef struct RiceContext {
52 } RiceContext;
53 
54 typedef struct AlacLPCContext {
55  int lpc_order;
57  int lpc_quant;
59 
60 typedef struct AlacEncodeContext {
61  int frame_size; /**< current frame size */
62  int verbatim; /**< current frame verbatim mode flag */
79 
80 
81 static void init_sample_buffers(AlacEncodeContext *s, int channels,
82  uint8_t const *samples[2])
83 {
84  int ch, i;
87 
88 #define COPY_SAMPLES(type) do { \
89  for (ch = 0; ch < channels; ch++) { \
90  int32_t *bptr = s->sample_buf[ch]; \
91  const type *sptr = (const type *)samples[ch]; \
92  for (i = 0; i < s->frame_size; i++) \
93  bptr[i] = sptr[i] >> shift; \
94  } \
95  } while (0)
96 
99  else
100  COPY_SAMPLES(int16_t);
101 }
102 
103 static void encode_scalar(AlacEncodeContext *s, int x,
104  int k, int write_sample_size)
105 {
106  int divisor, q, r;
107 
108  k = FFMIN(k, s->rc.k_modifier);
109  divisor = (1<<k) - 1;
110  q = x / divisor;
111  r = x % divisor;
112 
113  if (q > 8) {
114  // write escape code and sample value directly
116  put_bits(&s->pbctx, write_sample_size, x);
117  } else {
118  if (q)
119  put_bits(&s->pbctx, q, (1<<q) - 1);
120  put_bits(&s->pbctx, 1, 0);
121 
122  if (k != 1) {
123  if (r > 0)
124  put_bits(&s->pbctx, k, r+1);
125  else
126  put_bits(&s->pbctx, k-1, 0);
127  }
128  }
129 }
130 
132  enum AlacRawDataBlockType element,
133  int instance)
134 {
135  int encode_fs = 0;
136 
138  encode_fs = 1;
139 
140  put_bits(&s->pbctx, 3, element); // element type
141  put_bits(&s->pbctx, 4, instance); // element instance
142  put_bits(&s->pbctx, 12, 0); // unused header bits
143  put_bits(&s->pbctx, 1, encode_fs); // Sample count is in the header
144  put_bits(&s->pbctx, 2, s->extra_bits >> 3); // Extra bytes (for 24-bit)
145  put_bits(&s->pbctx, 1, s->verbatim); // Audio block is verbatim
146  if (encode_fs)
147  put_bits32(&s->pbctx, s->frame_size); // No. of samples in the frame
148 }
149 
151 {
153  int shift[MAX_LPC_ORDER];
154  int opt_order;
155 
156  if (s->compression_level == 1) {
157  s->lpc[ch].lpc_order = 6;
158  s->lpc[ch].lpc_quant = 6;
159  s->lpc[ch].lpc_coeff[0] = 160;
160  s->lpc[ch].lpc_coeff[1] = -190;
161  s->lpc[ch].lpc_coeff[2] = 170;
162  s->lpc[ch].lpc_coeff[3] = -130;
163  s->lpc[ch].lpc_coeff[4] = 80;
164  s->lpc[ch].lpc_coeff[5] = -25;
165  } else {
166  opt_order = ff_lpc_calc_coefs(&s->lpc_ctx, s->sample_buf[ch],
167  s->frame_size,
170  ALAC_MAX_LPC_PRECISION, coefs, shift,
173 
174  s->lpc[ch].lpc_order = opt_order;
175  s->lpc[ch].lpc_quant = shift[opt_order-1];
176  memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1], opt_order*sizeof(int));
177  }
178 }
179 
180 static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n)
181 {
182  int i, best;
183  int32_t lt, rt;
184  uint64_t sum[4];
185  uint64_t score[4];
186 
187  /* calculate sum of 2nd order residual for each channel */
188  sum[0] = sum[1] = sum[2] = sum[3] = 0;
189  for (i = 2; i < n; i++) {
190  lt = left_ch[i] - 2 * left_ch[i - 1] + left_ch[i - 2];
191  rt = right_ch[i] - 2 * right_ch[i - 1] + right_ch[i - 2];
192  sum[2] += FFABS((lt + rt) >> 1);
193  sum[3] += FFABS(lt - rt);
194  sum[0] += FFABS(lt);
195  sum[1] += FFABS(rt);
196  }
197 
198  /* calculate score for each mode */
199  score[0] = sum[0] + sum[1];
200  score[1] = sum[0] + sum[3];
201  score[2] = sum[1] + sum[3];
202  score[3] = sum[2] + sum[3];
203 
204  /* return mode with lowest score */
205  best = 0;
206  for (i = 1; i < 4; i++) {
207  if (score[i] < score[best])
208  best = i;
209  }
210  return best;
211 }
212 
214 {
215  int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
216  int i, mode, n = s->frame_size;
217  int32_t tmp;
218 
219  mode = estimate_stereo_mode(left, right, n);
220 
221  switch (mode) {
223  s->interlacing_leftweight = 0;
224  s->interlacing_shift = 0;
225  break;
227  for (i = 0; i < n; i++)
228  right[i] = left[i] - right[i];
229  s->interlacing_leftweight = 1;
230  s->interlacing_shift = 0;
231  break;
233  for (i = 0; i < n; i++) {
234  tmp = right[i];
235  right[i] = left[i] - right[i];
236  left[i] = tmp + (right[i] >> 31);
237  }
238  s->interlacing_leftweight = 1;
239  s->interlacing_shift = 31;
240  break;
241  default:
242  for (i = 0; i < n; i++) {
243  tmp = left[i];
244  left[i] = (tmp + right[i]) >> 1;
245  right[i] = tmp - right[i];
246  }
247  s->interlacing_leftweight = 1;
248  s->interlacing_shift = 1;
249  break;
250  }
251 }
252 
254 {
255  int i;
256  AlacLPCContext lpc = s->lpc[ch];
257 
258  if (lpc.lpc_order == 31) {
259  s->predictor_buf[0] = s->sample_buf[ch][0];
260 
261  for (i = 1; i < s->frame_size; i++) {
262  s->predictor_buf[i] = s->sample_buf[ch][i ] -
263  s->sample_buf[ch][i - 1];
264  }
265 
266  return;
267  }
268 
269  // generalised linear predictor
270 
271  if (lpc.lpc_order > 0) {
272  int32_t *samples = s->sample_buf[ch];
273  int32_t *residual = s->predictor_buf;
274 
275  // generate warm-up samples
276  residual[0] = samples[0];
277  for (i = 1; i <= lpc.lpc_order; i++)
278  residual[i] = samples[i] - samples[i-1];
279 
280  // perform lpc on remaining samples
281  for (i = lpc.lpc_order + 1; i < s->frame_size; i++) {
282  int sum = 1 << (lpc.lpc_quant - 1), res_val, j;
283 
284  for (j = 0; j < lpc.lpc_order; j++) {
285  sum += (samples[lpc.lpc_order-j] - samples[0]) *
286  lpc.lpc_coeff[j];
287  }
288 
289  sum >>= lpc.lpc_quant;
290  sum += samples[0];
291  residual[i] = sign_extend(samples[lpc.lpc_order+1] - sum,
292  s->write_sample_size);
293  res_val = residual[i];
294 
295  if (res_val) {
296  int index = lpc.lpc_order - 1;
297  int neg = (res_val < 0);
298 
299  while (index >= 0 && (neg ? (res_val < 0) : (res_val > 0))) {
300  int val = samples[0] - samples[lpc.lpc_order - index];
301  int sign = (val ? FFSIGN(val) : 0);
302 
303  if (neg)
304  sign *= -1;
305 
306  lpc.lpc_coeff[index] -= sign;
307  val *= sign;
308  res_val -= (val >> lpc.lpc_quant) * (lpc.lpc_order - index);
309  index--;
310  }
311  }
312  samples++;
313  }
314  }
315 }
316 
318 {
319  unsigned int history = s->rc.initial_history;
320  int sign_modifier = 0, i, k;
322 
323  for (i = 0; i < s->frame_size;) {
324  int x;
325 
326  k = av_log2((history >> 9) + 3);
327 
328  x = -2 * (*samples) -1;
329  x ^= x >> 31;
330 
331  samples++;
332  i++;
333 
334  encode_scalar(s, x - sign_modifier, k, s->write_sample_size);
335 
336  history += x * s->rc.history_mult -
337  ((history * s->rc.history_mult) >> 9);
338 
339  sign_modifier = 0;
340  if (x > 0xFFFF)
341  history = 0xFFFF;
342 
343  if (history < 128 && i < s->frame_size) {
344  unsigned int block_size = 0;
345 
346  k = 7 - av_log2(history) + ((history + 16) >> 6);
347 
348  while (*samples == 0 && i < s->frame_size) {
349  samples++;
350  i++;
351  block_size++;
352  }
353  encode_scalar(s, block_size, k, 16);
354  sign_modifier = (block_size <= 0xFFFF);
355  history = 0;
356  }
357 
358  }
359 }
360 
362  enum AlacRawDataBlockType element, int instance,
363  const uint8_t *samples0, const uint8_t *samples1)
364 {
365  uint8_t const *samples[2] = { samples0, samples1 };
366  int i, j, channels;
367  int prediction_type = 0;
368  PutBitContext *pb = &s->pbctx;
369 
370  channels = element == TYPE_CPE ? 2 : 1;
371 
372  if (s->verbatim) {
373  write_element_header(s, element, instance);
374  /* samples are channel-interleaved in verbatim mode */
375  if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
376  int shift = 32 - s->avctx->bits_per_raw_sample;
377  int32_t const *samples_s32[2] = { (const int32_t *)samples0,
378  (const int32_t *)samples1 };
379  for (i = 0; i < s->frame_size; i++)
380  for (j = 0; j < channels; j++)
382  samples_s32[j][i] >> shift);
383  } else {
384  int16_t const *samples_s16[2] = { (const int16_t *)samples0,
385  (const int16_t *)samples1 };
386  for (i = 0; i < s->frame_size; i++)
387  for (j = 0; j < channels; j++)
389  samples_s16[j][i]);
390  }
391  } else {
393  channels - 1;
394 
395  init_sample_buffers(s, channels, samples);
396  write_element_header(s, element, instance);
397 
398  if (channels == 2)
400  else
402  put_bits(pb, 8, s->interlacing_shift);
403  put_bits(pb, 8, s->interlacing_leftweight);
404 
405  for (i = 0; i < channels; i++) {
406  calc_predictor_params(s, i);
407 
408  put_bits(pb, 4, prediction_type);
409  put_bits(pb, 4, s->lpc[i].lpc_quant);
410 
411  put_bits(pb, 3, s->rc.rice_modifier);
412  put_bits(pb, 5, s->lpc[i].lpc_order);
413  // predictor coeff. table
414  for (j = 0; j < s->lpc[i].lpc_order; j++)
415  put_sbits(pb, 16, s->lpc[i].lpc_coeff[j]);
416  }
417 
418  // write extra bits if needed
419  if (s->extra_bits) {
420  uint32_t mask = (1 << s->extra_bits) - 1;
421  for (i = 0; i < s->frame_size; i++) {
422  for (j = 0; j < channels; j++) {
423  put_bits(pb, s->extra_bits, s->sample_buf[j][i] & mask);
424  s->sample_buf[j][i] >>= s->extra_bits;
425  }
426  }
427  }
428 
429  // apply lpc and entropy coding to audio samples
430  for (i = 0; i < channels; i++) {
431  alac_linear_predictor(s, i);
432 
433  // TODO: determine when this will actually help. for now it's not used.
434  if (prediction_type == 15) {
435  // 2nd pass 1st order filter
436  for (j = s->frame_size - 1; j > 0; j--)
437  s->predictor_buf[j] -= s->predictor_buf[j - 1];
438  }
440  }
441  }
442 }
443 
444 static int write_frame(AlacEncodeContext *s, AVPacket *avpkt,
445  uint8_t * const *samples)
446 {
447  PutBitContext *pb = &s->pbctx;
448  const enum AlacRawDataBlockType *ch_elements = ff_alac_channel_elements[s->avctx->channels - 1];
449  const uint8_t *ch_map = ff_alac_channel_layout_offsets[s->avctx->channels - 1];
450  int ch, element, sce, cpe;
451 
452  init_put_bits(pb, avpkt->data, avpkt->size);
453 
454  ch = element = sce = cpe = 0;
455  while (ch < s->avctx->channels) {
456  if (ch_elements[element] == TYPE_CPE) {
457  write_element(s, TYPE_CPE, cpe, samples[ch_map[ch]],
458  samples[ch_map[ch + 1]]);
459  cpe++;
460  ch += 2;
461  } else {
462  write_element(s, TYPE_SCE, sce, samples[ch_map[ch]], NULL);
463  sce++;
464  ch++;
465  }
466  element++;
467  }
468 
469  put_bits(pb, 3, TYPE_END);
470  flush_put_bits(pb);
471 
472  return put_bits_count(pb) >> 3;
473 }
474 
475 static av_always_inline int get_max_frame_size(int frame_size, int ch, int bps)
476 {
477  int header_bits = 23 + 32 * (frame_size < DEFAULT_FRAME_SIZE);
478  return FFALIGN(header_bits + bps * ch * frame_size + 3, 8) / 8;
479 }
480 
482 {
483  AlacEncodeContext *s = avctx->priv_data;
484  ff_lpc_end(&s->lpc_ctx);
485  av_freep(&avctx->extradata);
486  avctx->extradata_size = 0;
487  av_freep(&avctx->coded_frame);
488  return 0;
489 }
490 
492 {
493  AlacEncodeContext *s = avctx->priv_data;
494  int ret;
495  uint8_t *alac_extradata;
496 
498 
499  if (avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
500  if (avctx->bits_per_raw_sample != 24)
501  av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
502  avctx->bits_per_raw_sample = 24;
503  } else {
504  avctx->bits_per_raw_sample = 16;
505  s->extra_bits = 0;
506  }
507 
508  // Set default compression level
510  s->compression_level = 2;
511  else
512  s->compression_level = av_clip(avctx->compression_level, 0, 2);
513 
514  // Initialize default Rice parameters
515  s->rc.history_mult = 40;
516  s->rc.initial_history = 10;
517  s->rc.k_modifier = 14;
518  s->rc.rice_modifier = 4;
519 
521  avctx->channels,
522  avctx->bits_per_raw_sample);
523 
525  if (!avctx->extradata) {
526  ret = AVERROR(ENOMEM);
527  goto error;
528  }
530 
531  alac_extradata = avctx->extradata;
532  AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE);
533  AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c'));
534  AV_WB32(alac_extradata+12, avctx->frame_size);
535  AV_WB8 (alac_extradata+17, avctx->bits_per_raw_sample);
536  AV_WB8 (alac_extradata+21, avctx->channels);
537  AV_WB32(alac_extradata+24, s->max_coded_frame_size);
538  AV_WB32(alac_extradata+28,
539  avctx->sample_rate * avctx->channels * avctx->bits_per_raw_sample); // average bitrate
540  AV_WB32(alac_extradata+32, avctx->sample_rate);
541 
542  // Set relevant extradata fields
543  if (s->compression_level > 0) {
544  AV_WB8(alac_extradata+18, s->rc.history_mult);
545  AV_WB8(alac_extradata+19, s->rc.initial_history);
546  AV_WB8(alac_extradata+20, s->rc.k_modifier);
547  }
548 
550  if (avctx->min_prediction_order >= 0) {
551  if (avctx->min_prediction_order < MIN_LPC_ORDER ||
553  av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
554  avctx->min_prediction_order);
555  ret = AVERROR(EINVAL);
556  goto error;
557  }
558 
560  }
561 
563  if (avctx->max_prediction_order >= 0) {
564  if (avctx->max_prediction_order < MIN_LPC_ORDER ||
566  av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
567  avctx->max_prediction_order);
568  ret = AVERROR(EINVAL);
569  goto error;
570  }
571 
573  }
574 
576  av_log(avctx, AV_LOG_ERROR,
577  "invalid prediction orders: min=%d max=%d\n",
579  ret = AVERROR(EINVAL);
580  goto error;
581  }
582 
583  avctx->coded_frame = avcodec_alloc_frame();
584  if (!avctx->coded_frame) {
585  ret = AVERROR(ENOMEM);
586  goto error;
587  }
588 
589  s->avctx = avctx;
590 
591  if ((ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
593  FF_LPC_TYPE_LEVINSON)) < 0) {
594  goto error;
595  }
596 
597  return 0;
598 error:
599  alac_encode_close(avctx);
600  return ret;
601 }
602 
603 static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
604  const AVFrame *frame, int *got_packet_ptr)
605 {
606  AlacEncodeContext *s = avctx->priv_data;
607  int out_bytes, max_frame_size, ret;
608 
609  s->frame_size = frame->nb_samples;
610 
611  if (frame->nb_samples < DEFAULT_FRAME_SIZE)
612  max_frame_size = get_max_frame_size(s->frame_size, avctx->channels,
613  avctx->bits_per_raw_sample);
614  else
615  max_frame_size = s->max_coded_frame_size;
616 
617  if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * max_frame_size)))
618  return ret;
619 
620  /* use verbatim mode for compression_level 0 */
621  if (s->compression_level) {
622  s->verbatim = 0;
623  s->extra_bits = avctx->bits_per_raw_sample - 16;
624  } else {
625  s->verbatim = 1;
626  s->extra_bits = 0;
627  }
628 
629  out_bytes = write_frame(s, avpkt, frame->extended_data);
630 
631  if (out_bytes > max_frame_size) {
632  /* frame too large. use verbatim mode */
633  s->verbatim = 1;
634  s->extra_bits = 0;
635  out_bytes = write_frame(s, avpkt, frame->extended_data);
636  }
637 
638  avpkt->size = out_bytes;
639  *got_packet_ptr = 1;
640  return 0;
641 }
642 
644  .name = "alac",
645  .type = AVMEDIA_TYPE_AUDIO,
646  .id = AV_CODEC_ID_ALAC,
647  .priv_data_size = sizeof(AlacEncodeContext),
649  .encode2 = alac_encode_frame,
651  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
652  .channel_layouts = ff_alac_channel_layouts,
653  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32P,
656  .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
657 };