FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
alacenc.c
Go to the documentation of this file.
1 /*
2  * ALAC audio encoder
3  * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "avcodec.h"
23 #include "put_bits.h"
24 #include "internal.h"
25 #include "lpc.h"
26 #include "mathops.h"
27 #include "alac_data.h"
28 
29 #define DEFAULT_FRAME_SIZE 4096
30 #define ALAC_EXTRADATA_SIZE 36
31 #define ALAC_FRAME_HEADER_SIZE 55
32 #define ALAC_FRAME_FOOTER_SIZE 3
33 
34 #define ALAC_ESCAPE_CODE 0x1FF
35 #define ALAC_MAX_LPC_ORDER 30
36 #define DEFAULT_MAX_PRED_ORDER 6
37 #define DEFAULT_MIN_PRED_ORDER 4
38 #define ALAC_MAX_LPC_PRECISION 9
39 #define ALAC_MAX_LPC_SHIFT 9
40 
41 #define ALAC_CHMODE_LEFT_RIGHT 0
42 #define ALAC_CHMODE_LEFT_SIDE 1
43 #define ALAC_CHMODE_RIGHT_SIDE 2
44 #define ALAC_CHMODE_MID_SIDE 3
45 
46 typedef struct RiceContext {
51 } RiceContext;
52 
53 typedef struct AlacLPCContext {
54  int lpc_order;
56  int lpc_quant;
58 
59 typedef struct AlacEncodeContext {
60  int frame_size; /**< current frame size */
61  int verbatim; /**< current frame verbatim mode flag */
78 
79 
80 static void init_sample_buffers(AlacEncodeContext *s, int channels,
81  uint8_t const *samples[2])
82 {
83  int ch, i;
86 
87 #define COPY_SAMPLES(type) do { \
88  for (ch = 0; ch < channels; ch++) { \
89  int32_t *bptr = s->sample_buf[ch]; \
90  const type *sptr = (const type *)samples[ch]; \
91  for (i = 0; i < s->frame_size; i++) \
92  bptr[i] = sptr[i] >> shift; \
93  } \
94  } while (0)
95 
98  else
99  COPY_SAMPLES(int16_t);
100 }
101 
102 static void encode_scalar(AlacEncodeContext *s, int x,
103  int k, int write_sample_size)
104 {
105  int divisor, q, r;
106 
107  k = FFMIN(k, s->rc.k_modifier);
108  divisor = (1<<k) - 1;
109  q = x / divisor;
110  r = x % divisor;
111 
112  if (q > 8) {
113  // write escape code and sample value directly
115  put_bits(&s->pbctx, write_sample_size, x);
116  } else {
117  if (q)
118  put_bits(&s->pbctx, q, (1<<q) - 1);
119  put_bits(&s->pbctx, 1, 0);
120 
121  if (k != 1) {
122  if (r > 0)
123  put_bits(&s->pbctx, k, r+1);
124  else
125  put_bits(&s->pbctx, k-1, 0);
126  }
127  }
128 }
129 
131  enum AlacRawDataBlockType element,
132  int instance)
133 {
134  int encode_fs = 0;
135 
137  encode_fs = 1;
138 
139  put_bits(&s->pbctx, 3, element); // element type
140  put_bits(&s->pbctx, 4, instance); // element instance
141  put_bits(&s->pbctx, 12, 0); // unused header bits
142  put_bits(&s->pbctx, 1, encode_fs); // Sample count is in the header
143  put_bits(&s->pbctx, 2, s->extra_bits >> 3); // Extra bytes (for 24-bit)
144  put_bits(&s->pbctx, 1, s->verbatim); // Audio block is verbatim
145  if (encode_fs)
146  put_bits32(&s->pbctx, s->frame_size); // No. of samples in the frame
147 }
148 
150 {
152  int shift[MAX_LPC_ORDER];
153  int opt_order;
154 
155  if (s->compression_level == 1) {
156  s->lpc[ch].lpc_order = 6;
157  s->lpc[ch].lpc_quant = 6;
158  s->lpc[ch].lpc_coeff[0] = 160;
159  s->lpc[ch].lpc_coeff[1] = -190;
160  s->lpc[ch].lpc_coeff[2] = 170;
161  s->lpc[ch].lpc_coeff[3] = -130;
162  s->lpc[ch].lpc_coeff[4] = 80;
163  s->lpc[ch].lpc_coeff[5] = -25;
164  } else {
165  opt_order = ff_lpc_calc_coefs(&s->lpc_ctx, s->sample_buf[ch],
166  s->frame_size,
169  ALAC_MAX_LPC_PRECISION, coefs, shift,
172 
173  s->lpc[ch].lpc_order = opt_order;
174  s->lpc[ch].lpc_quant = shift[opt_order-1];
175  memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1], opt_order*sizeof(int));
176  }
177 }
178 
179 static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n)
180 {
181  int i, best;
182  int32_t lt, rt;
183  uint64_t sum[4];
184  uint64_t score[4];
185 
186  /* calculate sum of 2nd order residual for each channel */
187  sum[0] = sum[1] = sum[2] = sum[3] = 0;
188  for (i = 2; i < n; i++) {
189  lt = left_ch[i] - 2 * left_ch[i - 1] + left_ch[i - 2];
190  rt = right_ch[i] - 2 * right_ch[i - 1] + right_ch[i - 2];
191  sum[2] += FFABS((lt + rt) >> 1);
192  sum[3] += FFABS(lt - rt);
193  sum[0] += FFABS(lt);
194  sum[1] += FFABS(rt);
195  }
196 
197  /* calculate score for each mode */
198  score[0] = sum[0] + sum[1];
199  score[1] = sum[0] + sum[3];
200  score[2] = sum[1] + sum[3];
201  score[3] = sum[2] + sum[3];
202 
203  /* return mode with lowest score */
204  best = 0;
205  for (i = 1; i < 4; i++) {
206  if (score[i] < score[best])
207  best = i;
208  }
209  return best;
210 }
211 
213 {
214  int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
215  int i, mode, n = s->frame_size;
216  int32_t tmp;
217 
218  mode = estimate_stereo_mode(left, right, n);
219 
220  switch (mode) {
222  s->interlacing_leftweight = 0;
223  s->interlacing_shift = 0;
224  break;
226  for (i = 0; i < n; i++)
227  right[i] = left[i] - right[i];
228  s->interlacing_leftweight = 1;
229  s->interlacing_shift = 0;
230  break;
232  for (i = 0; i < n; i++) {
233  tmp = right[i];
234  right[i] = left[i] - right[i];
235  left[i] = tmp + (right[i] >> 31);
236  }
237  s->interlacing_leftweight = 1;
238  s->interlacing_shift = 31;
239  break;
240  default:
241  for (i = 0; i < n; i++) {
242  tmp = left[i];
243  left[i] = (tmp + right[i]) >> 1;
244  right[i] = tmp - right[i];
245  }
246  s->interlacing_leftweight = 1;
247  s->interlacing_shift = 1;
248  break;
249  }
250 }
251 
253 {
254  int i;
255  AlacLPCContext lpc = s->lpc[ch];
256 
257  if (lpc.lpc_order == 31) {
258  s->predictor_buf[0] = s->sample_buf[ch][0];
259 
260  for (i = 1; i < s->frame_size; i++) {
261  s->predictor_buf[i] = s->sample_buf[ch][i ] -
262  s->sample_buf[ch][i - 1];
263  }
264 
265  return;
266  }
267 
268  // generalised linear predictor
269 
270  if (lpc.lpc_order > 0) {
271  int32_t *samples = s->sample_buf[ch];
272  int32_t *residual = s->predictor_buf;
273 
274  // generate warm-up samples
275  residual[0] = samples[0];
276  for (i = 1; i <= lpc.lpc_order; i++)
277  residual[i] = samples[i] - samples[i-1];
278 
279  // perform lpc on remaining samples
280  for (i = lpc.lpc_order + 1; i < s->frame_size; i++) {
281  int sum = 1 << (lpc.lpc_quant - 1), res_val, j;
282 
283  for (j = 0; j < lpc.lpc_order; j++) {
284  sum += (samples[lpc.lpc_order-j] - samples[0]) *
285  lpc.lpc_coeff[j];
286  }
287 
288  sum >>= lpc.lpc_quant;
289  sum += samples[0];
290  residual[i] = sign_extend(samples[lpc.lpc_order+1] - sum,
291  s->write_sample_size);
292  res_val = residual[i];
293 
294  if (res_val) {
295  int index = lpc.lpc_order - 1;
296  int neg = (res_val < 0);
297 
298  while (index >= 0 && (neg ? (res_val < 0) : (res_val > 0))) {
299  int val = samples[0] - samples[lpc.lpc_order - index];
300  int sign = (val ? FFSIGN(val) : 0);
301 
302  if (neg)
303  sign *= -1;
304 
305  lpc.lpc_coeff[index] -= sign;
306  val *= sign;
307  res_val -= (val >> lpc.lpc_quant) * (lpc.lpc_order - index);
308  index--;
309  }
310  }
311  samples++;
312  }
313  }
314 }
315 
317 {
318  unsigned int history = s->rc.initial_history;
319  int sign_modifier = 0, i, k;
321 
322  for (i = 0; i < s->frame_size;) {
323  int x;
324 
325  k = av_log2((history >> 9) + 3);
326 
327  x = -2 * (*samples) -1;
328  x ^= x >> 31;
329 
330  samples++;
331  i++;
332 
333  encode_scalar(s, x - sign_modifier, k, s->write_sample_size);
334 
335  history += x * s->rc.history_mult -
336  ((history * s->rc.history_mult) >> 9);
337 
338  sign_modifier = 0;
339  if (x > 0xFFFF)
340  history = 0xFFFF;
341 
342  if (history < 128 && i < s->frame_size) {
343  unsigned int block_size = 0;
344 
345  k = 7 - av_log2(history) + ((history + 16) >> 6);
346 
347  while (*samples == 0 && i < s->frame_size) {
348  samples++;
349  i++;
350  block_size++;
351  }
352  encode_scalar(s, block_size, k, 16);
353  sign_modifier = (block_size <= 0xFFFF);
354  history = 0;
355  }
356 
357  }
358 }
359 
361  enum AlacRawDataBlockType element, int instance,
362  const uint8_t *samples0, const uint8_t *samples1)
363 {
364  uint8_t const *samples[2] = { samples0, samples1 };
365  int i, j, channels;
366  int prediction_type = 0;
367  PutBitContext *pb = &s->pbctx;
368 
369  channels = element == TYPE_CPE ? 2 : 1;
370 
371  if (s->verbatim) {
372  write_element_header(s, element, instance);
373  /* samples are channel-interleaved in verbatim mode */
374  if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
375  int shift = 32 - s->avctx->bits_per_raw_sample;
376  int32_t const *samples_s32[2] = { (const int32_t *)samples0,
377  (const int32_t *)samples1 };
378  for (i = 0; i < s->frame_size; i++)
379  for (j = 0; j < channels; j++)
381  samples_s32[j][i] >> shift);
382  } else {
383  int16_t const *samples_s16[2] = { (const int16_t *)samples0,
384  (const int16_t *)samples1 };
385  for (i = 0; i < s->frame_size; i++)
386  for (j = 0; j < channels; j++)
388  samples_s16[j][i]);
389  }
390  } else {
392  channels - 1;
393 
394  init_sample_buffers(s, channels, samples);
395  write_element_header(s, element, instance);
396 
397  if (channels == 2)
399  else
401  put_bits(pb, 8, s->interlacing_shift);
402  put_bits(pb, 8, s->interlacing_leftweight);
403 
404  for (i = 0; i < channels; i++) {
405  calc_predictor_params(s, i);
406 
407  put_bits(pb, 4, prediction_type);
408  put_bits(pb, 4, s->lpc[i].lpc_quant);
409 
410  put_bits(pb, 3, s->rc.rice_modifier);
411  put_bits(pb, 5, s->lpc[i].lpc_order);
412  // predictor coeff. table
413  for (j = 0; j < s->lpc[i].lpc_order; j++)
414  put_sbits(pb, 16, s->lpc[i].lpc_coeff[j]);
415  }
416 
417  // write extra bits if needed
418  if (s->extra_bits) {
419  uint32_t mask = (1 << s->extra_bits) - 1;
420  for (i = 0; i < s->frame_size; i++) {
421  for (j = 0; j < channels; j++) {
422  put_bits(pb, s->extra_bits, s->sample_buf[j][i] & mask);
423  s->sample_buf[j][i] >>= s->extra_bits;
424  }
425  }
426  }
427 
428  // apply lpc and entropy coding to audio samples
429  for (i = 0; i < channels; i++) {
430  alac_linear_predictor(s, i);
431 
432  // TODO: determine when this will actually help. for now it's not used.
433  if (prediction_type == 15) {
434  // 2nd pass 1st order filter
435  for (j = s->frame_size - 1; j > 0; j--)
436  s->predictor_buf[j] -= s->predictor_buf[j - 1];
437  }
439  }
440  }
441 }
442 
443 static int write_frame(AlacEncodeContext *s, AVPacket *avpkt,
444  uint8_t * const *samples)
445 {
446  PutBitContext *pb = &s->pbctx;
447  const enum AlacRawDataBlockType *ch_elements = ff_alac_channel_elements[s->avctx->channels - 1];
448  const uint8_t *ch_map = ff_alac_channel_layout_offsets[s->avctx->channels - 1];
449  int ch, element, sce, cpe;
450 
451  init_put_bits(pb, avpkt->data, avpkt->size);
452 
453  ch = element = sce = cpe = 0;
454  while (ch < s->avctx->channels) {
455  if (ch_elements[element] == TYPE_CPE) {
456  write_element(s, TYPE_CPE, cpe, samples[ch_map[ch]],
457  samples[ch_map[ch + 1]]);
458  cpe++;
459  ch += 2;
460  } else {
461  write_element(s, TYPE_SCE, sce, samples[ch_map[ch]], NULL);
462  sce++;
463  ch++;
464  }
465  element++;
466  }
467 
468  put_bits(pb, 3, TYPE_END);
469  flush_put_bits(pb);
470 
471  return put_bits_count(pb) >> 3;
472 }
473 
474 static av_always_inline int get_max_frame_size(int frame_size, int ch, int bps)
475 {
476  int header_bits = 23 + 32 * (frame_size < DEFAULT_FRAME_SIZE);
477  return FFALIGN(header_bits + bps * ch * frame_size + 3, 8) / 8;
478 }
479 
481 {
482  AlacEncodeContext *s = avctx->priv_data;
483  ff_lpc_end(&s->lpc_ctx);
484  av_freep(&avctx->extradata);
485  avctx->extradata_size = 0;
486  av_freep(&avctx->coded_frame);
487  return 0;
488 }
489 
491 {
492  AlacEncodeContext *s = avctx->priv_data;
493  int ret;
494  uint8_t *alac_extradata;
495 
497 
498  if (avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
499  if (avctx->bits_per_raw_sample != 24)
500  av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
501  avctx->bits_per_raw_sample = 24;
502  } else {
503  avctx->bits_per_raw_sample = 16;
504  s->extra_bits = 0;
505  }
506 
507  // Set default compression level
509  s->compression_level = 2;
510  else
511  s->compression_level = av_clip(avctx->compression_level, 0, 2);
512 
513  // Initialize default Rice parameters
514  s->rc.history_mult = 40;
515  s->rc.initial_history = 10;
516  s->rc.k_modifier = 14;
517  s->rc.rice_modifier = 4;
518 
520  avctx->channels,
521  avctx->bits_per_raw_sample);
522 
524  if (!avctx->extradata) {
525  ret = AVERROR(ENOMEM);
526  goto error;
527  }
529 
530  alac_extradata = avctx->extradata;
531  AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE);
532  AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c'));
533  AV_WB32(alac_extradata+12, avctx->frame_size);
534  AV_WB8 (alac_extradata+17, avctx->bits_per_raw_sample);
535  AV_WB8 (alac_extradata+21, avctx->channels);
536  AV_WB32(alac_extradata+24, s->max_coded_frame_size);
537  AV_WB32(alac_extradata+28,
538  avctx->sample_rate * avctx->channels * avctx->bits_per_raw_sample); // average bitrate
539  AV_WB32(alac_extradata+32, avctx->sample_rate);
540 
541  // Set relevant extradata fields
542  if (s->compression_level > 0) {
543  AV_WB8(alac_extradata+18, s->rc.history_mult);
544  AV_WB8(alac_extradata+19, s->rc.initial_history);
545  AV_WB8(alac_extradata+20, s->rc.k_modifier);
546  }
547 
549  if (avctx->min_prediction_order >= 0) {
550  if (avctx->min_prediction_order < MIN_LPC_ORDER ||
552  av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
553  avctx->min_prediction_order);
554  ret = AVERROR(EINVAL);
555  goto error;
556  }
557 
559  }
560 
562  if (avctx->max_prediction_order >= 0) {
563  if (avctx->max_prediction_order < MIN_LPC_ORDER ||
565  av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
566  avctx->max_prediction_order);
567  ret = AVERROR(EINVAL);
568  goto error;
569  }
570 
572  }
573 
575  av_log(avctx, AV_LOG_ERROR,
576  "invalid prediction orders: min=%d max=%d\n",
578  ret = AVERROR(EINVAL);
579  goto error;
580  }
581 
582  avctx->coded_frame = avcodec_alloc_frame();
583  if (!avctx->coded_frame) {
584  ret = AVERROR(ENOMEM);
585  goto error;
586  }
587 
588  s->avctx = avctx;
589 
590  if ((ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
592  FF_LPC_TYPE_LEVINSON)) < 0) {
593  goto error;
594  }
595 
596  return 0;
597 error:
598  alac_encode_close(avctx);
599  return ret;
600 }
601 
602 static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
603  const AVFrame *frame, int *got_packet_ptr)
604 {
605  AlacEncodeContext *s = avctx->priv_data;
606  int out_bytes, max_frame_size, ret;
607 
608  s->frame_size = frame->nb_samples;
609 
610  if (frame->nb_samples < DEFAULT_FRAME_SIZE)
611  max_frame_size = get_max_frame_size(s->frame_size, avctx->channels,
612  avctx->bits_per_raw_sample);
613  else
614  max_frame_size = s->max_coded_frame_size;
615 
616  if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * max_frame_size)) < 0)
617  return ret;
618 
619  /* use verbatim mode for compression_level 0 */
620  if (s->compression_level) {
621  s->verbatim = 0;
622  s->extra_bits = avctx->bits_per_raw_sample - 16;
623  } else {
624  s->verbatim = 1;
625  s->extra_bits = 0;
626  }
627 
628  out_bytes = write_frame(s, avpkt, frame->extended_data);
629 
630  if (out_bytes > max_frame_size) {
631  /* frame too large. use verbatim mode */
632  s->verbatim = 1;
633  s->extra_bits = 0;
634  out_bytes = write_frame(s, avpkt, frame->extended_data);
635  }
636 
637  avpkt->size = out_bytes;
638  *got_packet_ptr = 1;
639  return 0;
640 }
641 
643  .name = "alac",
644  .type = AVMEDIA_TYPE_AUDIO,
645  .id = AV_CODEC_ID_ALAC,
646  .priv_data_size = sizeof(AlacEncodeContext),
648  .encode2 = alac_encode_frame,
650  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
651  .channel_layouts = ff_alac_channel_layouts,
652  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32P,
655  .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
656 };