FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
af_loudnorm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /* http://k.ylo.ph/2016/04/04/loudnorm.html */
22 
23 #include "libavutil/opt.h"
24 #include "avfilter.h"
25 #include "internal.h"
26 #include "audio.h"
27 #include <ebur128.h>
28 
29 enum FrameType {
35 };
36 
38  OUT,
43 };
44 
50 };
51 
52 typedef struct LoudNormContext {
53  const AVClass *class;
54  double target_i;
55  double target_lra;
56  double target_tp;
57  double measured_i;
58  double measured_lra;
59  double measured_tp;
61  double offset;
62  int linear;
63  int dual_mono;
65 
66  double *buf;
67  int buf_size;
68  int buf_index;
70 
71  double delta[30];
72  double weights[21];
73  double prev_delta;
74  int index;
75 
76  double gain_reduction[2];
77  double *limiter_buf;
78  double *prev_smp;
83  int env_index;
84  int env_cnt;
87 
88  int64_t pts;
92  int channels;
93 
94  ebur128_state *r128_in;
95  ebur128_state *r128_out;
97 
98 #define OFFSET(x) offsetof(LoudNormContext, x)
99 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
100 
101 static const AVOption loudnorm_options[] = {
102  { "I", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
103  { "i", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
104  { "LRA", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
105  { "lra", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
106  { "TP", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
107  { "tp", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
108  { "measured_I", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
109  { "measured_i", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
110  { "measured_LRA", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
111  { "measured_lra", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
112  { "measured_TP", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
113  { "measured_tp", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
114  { "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh), AV_OPT_TYPE_DOUBLE, {.dbl = -70.}, -99., 0., FLAGS },
115  { "offset", "set offset gain", OFFSET(offset), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 99., FLAGS },
116  { "linear", "normalize linearly if possible", OFFSET(linear), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
117  { "dual_mono", "treat mono input as dual-mono", OFFSET(dual_mono), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
118  { "print_format", "set print format for stats", OFFSET(print_format), AV_OPT_TYPE_INT, {.i64 = NONE}, NONE, PF_NB -1, FLAGS, "print_format" },
119  { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = NONE}, 0, 0, FLAGS, "print_format" },
120  { "json", 0, 0, AV_OPT_TYPE_CONST, {.i64 = JSON}, 0, 0, FLAGS, "print_format" },
121  { "summary", 0, 0, AV_OPT_TYPE_CONST, {.i64 = SUMMARY}, 0, 0, FLAGS, "print_format" },
122  { NULL }
123 };
124 
125 AVFILTER_DEFINE_CLASS(loudnorm);
126 
127 static inline int frame_size(int sample_rate, int frame_len_msec)
128 {
129  const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
130  return frame_size + (frame_size % 2);
131 }
132 
134 {
135  double total_weight = 0.0;
136  const double sigma = 3.5;
137  double adjust;
138  int i;
139 
140  const int offset = 21 / 2;
141  const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
142  const double c2 = 2.0 * pow(sigma, 2.0);
143 
144  for (i = 0; i < 21; i++) {
145  const int x = i - offset;
146  s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
147  total_weight += s->weights[i];
148  }
149 
150  adjust = 1.0 / total_weight;
151  for (i = 0; i < 21; i++)
152  s->weights[i] *= adjust;
153 }
154 
156 {
157  double result = 0.;
158  int i;
159 
160  index = index - 10 > 0 ? index - 10 : index + 20;
161  for (i = 0; i < 21; i++)
162  result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
163 
164  return result;
165 }
166 
167 static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
168 {
169  int n, c, i, index;
170  double ceiling;
171  double *buf;
172 
173  *peak_delta = -1;
174  buf = s->limiter_buf;
175  ceiling = s->target_tp;
176 
177  index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
178  if (index >= s->limiter_buf_size)
179  index -= s->limiter_buf_size;
180 
181  if (s->frame_type == FIRST_FRAME) {
182  for (c = 0; c < channels; c++)
183  s->prev_smp[c] = fabs(buf[index + c - channels]);
184  }
185 
186  for (n = 0; n < nb_samples; n++) {
187  for (c = 0; c < channels; c++) {
188  double this, next, max_peak;
189 
190  this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
191  next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
192 
193  if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
194  int detected;
195 
196  detected = 1;
197  for (i = 2; i < 12; i++) {
198  next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
199  if (next > this) {
200  detected = 0;
201  break;
202  }
203  }
204 
205  if (!detected)
206  continue;
207 
208  for (c = 0; c < channels; c++) {
209  if (c == 0 || fabs(buf[index + c]) > max_peak)
210  max_peak = fabs(buf[index + c]);
211 
212  s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
213  }
214 
215  *peak_delta = n;
216  s->peak_index = index;
217  *peak_value = max_peak;
218  return;
219  }
220 
221  s->prev_smp[c] = this;
222  }
223 
224  index += channels;
225  if (index >= s->limiter_buf_size)
226  index -= s->limiter_buf_size;
227  }
228 }
229 
230 static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
231 {
232  int n, c, index, peak_delta, smp_cnt;
233  double ceiling, peak_value;
234  double *buf;
235 
236  buf = s->limiter_buf;
237  ceiling = s->target_tp;
238  index = s->limiter_buf_index;
239  smp_cnt = 0;
240 
241  if (s->frame_type == FIRST_FRAME) {
242  double max;
243 
244  max = 0.;
245  for (n = 0; n < 1920; n++) {
246  for (c = 0; c < channels; c++) {
247  max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
248  }
249  buf += channels;
250  }
251 
252  if (max > ceiling) {
253  s->gain_reduction[1] = ceiling / max;
254  s->limiter_state = SUSTAIN;
255  buf = s->limiter_buf;
256 
257  for (n = 0; n < 1920; n++) {
258  for (c = 0; c < channels; c++) {
259  double env;
260  env = s->gain_reduction[1];
261  buf[c] *= env;
262  }
263  buf += channels;
264  }
265  }
266 
267  buf = s->limiter_buf;
268  }
269 
270  do {
271 
272  switch(s->limiter_state) {
273  case OUT:
274  detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
275  if (peak_delta != -1) {
276  s->env_cnt = 0;
277  smp_cnt += (peak_delta - s->attack_length);
278  s->gain_reduction[0] = 1.;
279  s->gain_reduction[1] = ceiling / peak_value;
280  s->limiter_state = ATTACK;
281 
282  s->env_index = s->peak_index - (s->attack_length * channels);
283  if (s->env_index < 0)
284  s->env_index += s->limiter_buf_size;
285 
286  s->env_index += (s->env_cnt * channels);
287  if (s->env_index > s->limiter_buf_size)
288  s->env_index -= s->limiter_buf_size;
289 
290  } else {
291  smp_cnt = nb_samples;
292  }
293  break;
294 
295  case ATTACK:
296  for (; s->env_cnt < s->attack_length; s->env_cnt++) {
297  for (c = 0; c < channels; c++) {
298  double env;
299  env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
300  buf[s->env_index + c] *= env;
301  }
302 
303  s->env_index += channels;
304  if (s->env_index >= s->limiter_buf_size)
305  s->env_index -= s->limiter_buf_size;
306 
307  smp_cnt++;
308  if (smp_cnt >= nb_samples) {
309  s->env_cnt++;
310  break;
311  }
312  }
313 
314  if (smp_cnt < nb_samples) {
315  s->env_cnt = 0;
316  s->attack_length = 1920;
317  s->limiter_state = SUSTAIN;
318  }
319  break;
320 
321  case SUSTAIN:
322  detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
323  if (peak_delta == -1) {
324  s->limiter_state = RELEASE;
325  s->gain_reduction[0] = s->gain_reduction[1];
326  s->gain_reduction[1] = 1.;
327  s->env_cnt = 0;
328  break;
329  } else {
330  double gain_reduction;
331  gain_reduction = ceiling / peak_value;
332 
333  if (gain_reduction < s->gain_reduction[1]) {
334  s->limiter_state = ATTACK;
335 
336  s->attack_length = peak_delta;
337  if (s->attack_length <= 1)
338  s->attack_length = 2;
339 
340  s->gain_reduction[0] = s->gain_reduction[1];
341  s->gain_reduction[1] = gain_reduction;
342  s->env_cnt = 0;
343  break;
344  }
345 
346  for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
347  for (c = 0; c < channels; c++) {
348  double env;
349  env = s->gain_reduction[1];
350  buf[s->env_index + c] *= env;
351  }
352 
353  s->env_index += channels;
354  if (s->env_index >= s->limiter_buf_size)
355  s->env_index -= s->limiter_buf_size;
356 
357  smp_cnt++;
358  if (smp_cnt >= nb_samples) {
359  s->env_cnt++;
360  break;
361  }
362  }
363  }
364  break;
365 
366  case RELEASE:
367  for (; s->env_cnt < s->release_length; s->env_cnt++) {
368  for (c = 0; c < channels; c++) {
369  double env;
370  env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
371  buf[s->env_index + c] *= env;
372  }
373 
374  s->env_index += channels;
375  if (s->env_index >= s->limiter_buf_size)
376  s->env_index -= s->limiter_buf_size;
377 
378  smp_cnt++;
379  if (smp_cnt >= nb_samples) {
380  s->env_cnt++;
381  break;
382  }
383  }
384 
385  if (smp_cnt < nb_samples) {
386  s->env_cnt = 0;
387  s->limiter_state = OUT;
388  }
389 
390  break;
391  }
392 
393  } while (smp_cnt < nb_samples);
394 
395  for (n = 0; n < nb_samples; n++) {
396  for (c = 0; c < channels; c++) {
397  out[c] = buf[index + c];
398  if (fabs(out[c]) > ceiling) {
399  out[c] = ceiling * (out[c] < 0 ? -1 : 1);
400  }
401  }
402  out += channels;
403  index += channels;
404  if (index >= s->limiter_buf_size)
405  index -= s->limiter_buf_size;
406  }
407 }
408 
409 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
410 {
411  AVFilterContext *ctx = inlink->dst;
412  LoudNormContext *s = ctx->priv;
413  AVFilterLink *outlink = ctx->outputs[0];
414  AVFrame *out;
415  const double *src;
416  double *dst;
417  double *buf;
418  double *limiter_buf;
419  int i, n, c, subframe_length, src_index;
420  double gain, gain_next, env_global, env_shortterm,
421  global, shortterm, lra, relative_threshold;
422 
423  if (av_frame_is_writable(in)) {
424  out = in;
425  } else {
426  out = ff_get_audio_buffer(inlink, in->nb_samples);
427  if (!out) {
428  av_frame_free(&in);
429  return AVERROR(ENOMEM);
430  }
431  av_frame_copy_props(out, in);
432  }
433 
434  out->pts = s->pts;
435  src = (const double *)in->data[0];
436  dst = (double *)out->data[0];
437  buf = s->buf;
438  limiter_buf = s->limiter_buf;
439 
440  ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
441 
442  if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
443  double offset, offset_tp, true_peak;
444 
445  ebur128_loudness_global(s->r128_in, &global);
446  for (c = 0; c < inlink->channels; c++) {
447  double tmp;
448  ebur128_sample_peak(s->r128_in, c, &tmp);
449  if (c == 0 || tmp > true_peak)
450  true_peak = tmp;
451  }
452 
453  offset = s->target_i - global;
454  offset_tp = true_peak + offset;
455  s->offset = offset_tp < s->target_tp ? offset : s->target_tp - true_peak;
456  s->offset = pow(10., s->offset / 20.);
457  s->frame_type = LINEAR_MODE;
458  }
459 
460  switch (s->frame_type) {
461  case FIRST_FRAME:
462  for (n = 0; n < in->nb_samples; n++) {
463  for (c = 0; c < inlink->channels; c++) {
464  buf[s->buf_index + c] = src[c];
465  }
466  src += inlink->channels;
467  s->buf_index += inlink->channels;
468  }
469 
470  ebur128_loudness_shortterm(s->r128_in, &shortterm);
471 
472  if (shortterm < s->measured_thresh) {
473  s->above_threshold = 0;
474  env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
475  } else {
476  s->above_threshold = 1;
477  env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
478  }
479 
480  for (n = 0; n < 30; n++)
481  s->delta[n] = pow(10., env_shortterm / 20.);
482  s->prev_delta = s->delta[s->index];
483 
484  s->buf_index =
485  s->limiter_buf_index = 0;
486 
487  for (n = 0; n < (s->limiter_buf_size / inlink->channels); n++) {
488  for (c = 0; c < inlink->channels; c++) {
489  limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
490  }
491  s->limiter_buf_index += inlink->channels;
492  if (s->limiter_buf_index >= s->limiter_buf_size)
494 
495  s->buf_index += inlink->channels;
496  }
497 
498  subframe_length = frame_size(inlink->sample_rate, 100);
499  true_peak_limiter(s, dst, subframe_length, inlink->channels);
500  ebur128_add_frames_double(s->r128_out, dst, subframe_length);
501 
502  s->pts +=
503  out->nb_samples =
504  inlink->min_samples =
505  inlink->max_samples =
506  inlink->partial_buf_size = subframe_length;
507 
508  s->frame_type = INNER_FRAME;
509  break;
510 
511  case INNER_FRAME:
512  gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
513  gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
514 
515  for (n = 0; n < in->nb_samples; n++) {
516  for (c = 0; c < inlink->channels; c++) {
517  buf[s->prev_buf_index + c] = src[c];
518  limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
519  }
520  src += inlink->channels;
521 
522  s->limiter_buf_index += inlink->channels;
523  if (s->limiter_buf_index >= s->limiter_buf_size)
525 
526  s->prev_buf_index += inlink->channels;
527  if (s->prev_buf_index >= s->buf_size)
528  s->prev_buf_index -= s->buf_size;
529 
530  s->buf_index += inlink->channels;
531  if (s->buf_index >= s->buf_size)
532  s->buf_index -= s->buf_size;
533  }
534 
535  subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->channels;
536  s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
537 
538  true_peak_limiter(s, dst, in->nb_samples, inlink->channels);
539  ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
540 
541  ebur128_loudness_range(s->r128_in, &lra);
542  ebur128_loudness_global(s->r128_in, &global);
543  ebur128_loudness_shortterm(s->r128_in, &shortterm);
544  ebur128_relative_threshold(s->r128_in, &relative_threshold);
545 
546  if (s->above_threshold == 0) {
547  double shortterm_out;
548 
549  if (shortterm > s->measured_thresh)
550  s->prev_delta *= 1.0058;
551 
552  ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
553  if (shortterm_out >= s->target_i)
554  s->above_threshold = 1;
555  }
556 
557  if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
558  s->delta[s->index] = s->prev_delta;
559  } else {
560  env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
561  env_shortterm = s->target_i - shortterm;
562  s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
563  }
564 
565  s->prev_delta = s->delta[s->index];
566  s->index++;
567  if (s->index >= 30)
568  s->index -= 30;
569  s->prev_nb_samples = in->nb_samples;
570  s->pts += in->nb_samples;
571  break;
572 
573  case FINAL_FRAME:
574  gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
575  s->limiter_buf_index = 0;
576  src_index = 0;
577 
578  for (n = 0; n < s->limiter_buf_size / inlink->channels; n++) {
579  for (c = 0; c < inlink->channels; c++) {
580  s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
581  }
582  src_index += inlink->channels;
583 
584  s->limiter_buf_index += inlink->channels;
585  if (s->limiter_buf_index >= s->limiter_buf_size)
587  }
588 
589  subframe_length = frame_size(inlink->sample_rate, 100);
590  for (i = 0; i < in->nb_samples / subframe_length; i++) {
591  true_peak_limiter(s, dst, subframe_length, inlink->channels);
592 
593  for (n = 0; n < subframe_length; n++) {
594  for (c = 0; c < inlink->channels; c++) {
595  if (src_index < (in->nb_samples * inlink->channels)) {
596  limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
597  } else {
598  limiter_buf[s->limiter_buf_index + c] = 0.;
599  }
600  }
601 
602  if (src_index < (in->nb_samples * inlink->channels))
603  src_index += inlink->channels;
604 
605  s->limiter_buf_index += inlink->channels;
606  if (s->limiter_buf_index >= s->limiter_buf_size)
608  }
609 
610  dst += (subframe_length * inlink->channels);
611  }
612 
613  dst = (double *)out->data[0];
614  ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
615  break;
616 
617  case LINEAR_MODE:
618  for (n = 0; n < in->nb_samples; n++) {
619  for (c = 0; c < inlink->channels; c++) {
620  dst[c] = src[c] * s->offset;
621  }
622  src += inlink->channels;
623  dst += inlink->channels;
624  }
625 
626  dst = (double *)out->data[0];
627  ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
628  s->pts += in->nb_samples;
629  break;
630  }
631 
632  if (in != out)
633  av_frame_free(&in);
634 
635  return ff_filter_frame(outlink, out);
636 }
637 
638 static int request_frame(AVFilterLink *outlink)
639 {
640  int ret;
641  AVFilterContext *ctx = outlink->src;
642  AVFilterLink *inlink = ctx->inputs[0];
643  LoudNormContext *s = ctx->priv;
644 
645  ret = ff_request_frame(inlink);
646  if (ret == AVERROR_EOF && s->frame_type == INNER_FRAME) {
647  double *src;
648  double *buf;
649  int nb_samples, n, c, offset;
650  AVFrame *frame;
651 
652  nb_samples = (s->buf_size / inlink->channels) - s->prev_nb_samples;
653  nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
654 
655  frame = ff_get_audio_buffer(outlink, nb_samples);
656  if (!frame)
657  return AVERROR(ENOMEM);
658  frame->nb_samples = nb_samples;
659 
660  buf = s->buf;
661  src = (double *)frame->data[0];
662 
663  offset = ((s->limiter_buf_size / inlink->channels) - s->prev_nb_samples) * inlink->channels;
664  offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->channels;
665  s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
666 
667  for (n = 0; n < nb_samples; n++) {
668  for (c = 0; c < inlink->channels; c++) {
669  src[c] = buf[s->buf_index + c];
670  }
671  src += inlink->channels;
672  s->buf_index += inlink->channels;
673  if (s->buf_index >= s->buf_size)
674  s->buf_index -= s->buf_size;
675  }
676 
677  s->frame_type = FINAL_FRAME;
678  ret = filter_frame(inlink, frame);
679  }
680  return ret;
681 }
682 
684 {
687  AVFilterLink *inlink = ctx->inputs[0];
688  AVFilterLink *outlink = ctx->outputs[0];
689  static const int input_srate[] = {192000, -1};
690  static const enum AVSampleFormat sample_fmts[] = {
693  };
694  int ret;
695 
696  layouts = ff_all_channel_counts();
697  if (!layouts)
698  return AVERROR(ENOMEM);
699  ret = ff_set_common_channel_layouts(ctx, layouts);
700  if (ret < 0)
701  return ret;
702 
703  formats = ff_make_format_list(sample_fmts);
704  if (!formats)
705  return AVERROR(ENOMEM);
706  ret = ff_set_common_formats(ctx, formats);
707  if (ret < 0)
708  return ret;
709 
710  formats = ff_make_format_list(input_srate);
711  if (!formats)
712  return AVERROR(ENOMEM);
713  ret = ff_formats_ref(formats, &inlink->out_samplerates);
714  if (ret < 0)
715  return ret;
716  ret = ff_formats_ref(formats, &outlink->in_samplerates);
717  if (ret < 0)
718  return ret;
719 
720  return 0;
721 }
722 
723 static int config_input(AVFilterLink *inlink)
724 {
725  AVFilterContext *ctx = inlink->dst;
726  LoudNormContext *s = ctx->priv;
727 
728  s->r128_in = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_SAMPLE_PEAK);
729  if (!s->r128_in)
730  return AVERROR(ENOMEM);
731 
732  s->r128_out = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_SAMPLE_PEAK);
733  if (!s->r128_out)
734  return AVERROR(ENOMEM);
735 
736  if (inlink->channels == 1 && s->dual_mono) {
737  ebur128_set_channel(s->r128_in, 0, EBUR128_DUAL_MONO);
738  ebur128_set_channel(s->r128_out, 0, EBUR128_DUAL_MONO);
739  }
740 
741  s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->channels;
742  s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
743  if (!s->buf)
744  return AVERROR(ENOMEM);
745 
746  s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->channels;
747  s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
748  if (!s->limiter_buf)
749  return AVERROR(ENOMEM);
750 
751  s->prev_smp = av_malloc_array(inlink->channels, sizeof(*s->prev_smp));
752  if (!s->prev_smp)
753  return AVERROR(ENOMEM);
754 
756 
757  s->frame_type = FIRST_FRAME;
758 
759  if (s->linear) {
760  double offset, offset_tp;
761  offset = s->target_i - s->measured_i;
762  offset_tp = s->measured_tp + offset;
763 
764  if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
765  if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
766  s->frame_type = LINEAR_MODE;
767  s->offset = offset;
768  }
769  }
770  }
771 
772  if (s->frame_type != LINEAR_MODE) {
773  inlink->min_samples =
774  inlink->max_samples =
775  inlink->partial_buf_size = frame_size(inlink->sample_rate, 3000);
776  }
777 
778  s->pts =
779  s->buf_index =
780  s->prev_buf_index =
781  s->limiter_buf_index = 0;
782  s->channels = inlink->channels;
783  s->index = 1;
784  s->limiter_state = OUT;
785  s->offset = pow(10., s->offset / 20.);
786  s->target_tp = pow(10., s->target_tp / 20.);
787  s->attack_length = frame_size(inlink->sample_rate, 10);
788  s->release_length = frame_size(inlink->sample_rate, 100);
789 
790  return 0;
791 }
792 
794 {
795  LoudNormContext *s = ctx->priv;
796  double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
797  int c;
798 
799  if (!s->r128_in || !s->r128_out)
800  goto end;
801 
802  ebur128_loudness_range(s->r128_in, &lra_in);
803  ebur128_loudness_global(s->r128_in, &i_in);
804  ebur128_relative_threshold(s->r128_in, &thresh_in);
805  for (c = 0; c < s->channels; c++) {
806  double tmp;
807  ebur128_sample_peak(s->r128_in, c, &tmp);
808  if ((c == 0) || (tmp > tp_in))
809  tp_in = tmp;
810  }
811 
812  ebur128_loudness_range(s->r128_out, &lra_out);
813  ebur128_loudness_global(s->r128_out, &i_out);
814  ebur128_relative_threshold(s->r128_out, &thresh_out);
815  for (c = 0; c < s->channels; c++) {
816  double tmp;
817  ebur128_sample_peak(s->r128_out, c, &tmp);
818  if ((c == 0) || (tmp > tp_out))
819  tp_out = tmp;
820  }
821 
822  switch(s->print_format) {
823  case NONE:
824  break;
825 
826  case JSON:
827  av_log(ctx, AV_LOG_INFO,
828  "\n{\n"
829  "\t\"input_i\" : \"%.2f\",\n"
830  "\t\"input_tp\" : \"%.2f\",\n"
831  "\t\"input_lra\" : \"%.2f\",\n"
832  "\t\"input_thresh\" : \"%.2f\",\n"
833  "\t\"output_i\" : \"%.2f\",\n"
834  "\t\"output_tp\" : \"%+.2f\",\n"
835  "\t\"output_lra\" : \"%.2f\",\n"
836  "\t\"output_thresh\" : \"%.2f\",\n"
837  "\t\"normalization_type\" : \"%s\",\n"
838  "\t\"target_offset\" : \"%.2f\"\n"
839  "}\n",
840  i_in,
841  20. * log10(tp_in),
842  lra_in,
843  thresh_in,
844  i_out,
845  20. * log10(tp_out),
846  lra_out,
847  thresh_out,
848  s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
849  s->target_i - i_out
850  );
851  break;
852 
853  case SUMMARY:
854  av_log(ctx, AV_LOG_INFO,
855  "\n"
856  "Input Integrated: %+6.1f LUFS\n"
857  "Input True Peak: %+6.1f dBTP\n"
858  "Input LRA: %6.1f LU\n"
859  "Input Threshold: %+6.1f LUFS\n"
860  "\n"
861  "Output Integrated: %+6.1f LUFS\n"
862  "Output True Peak: %+6.1f dBTP\n"
863  "Output LRA: %6.1f LU\n"
864  "Output Threshold: %+6.1f LUFS\n"
865  "\n"
866  "Normalization Type: %s\n"
867  "Target Offset: %+6.1f LU\n",
868  i_in,
869  20. * log10(tp_in),
870  lra_in,
871  thresh_in,
872  i_out,
873  20. * log10(tp_out),
874  lra_out,
875  thresh_out,
876  s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
877  s->target_i - i_out
878  );
879  break;
880  }
881 
882 end:
883  if (s->r128_in)
884  ebur128_destroy(&s->r128_in);
885  if (s->r128_out)
886  ebur128_destroy(&s->r128_out);
887  av_freep(&s->limiter_buf);
888  av_freep(&s->prev_smp);
889  av_freep(&s->buf);
890 }
891 
893  {
894  .name = "default",
895  .type = AVMEDIA_TYPE_AUDIO,
896  .config_props = config_input,
897  .filter_frame = filter_frame,
898  },
899  { NULL }
900 };
901 
903  {
904  .name = "default",
905  .request_frame = request_frame,
906  .type = AVMEDIA_TYPE_AUDIO,
907  },
908  { NULL }
909 };
910 
912  .name = "loudnorm",
913  .description = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
914  .priv_size = sizeof(LoudNormContext),
915  .priv_class = &loudnorm_class,
917  .uninit = uninit,
918  .inputs = avfilter_af_loudnorm_inputs,
919  .outputs = avfilter_af_loudnorm_outputs,
920 };
#define NULL
Definition: coverity.c:32
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates...
Definition: formats.c:549
const char * s
Definition: avisynth_c.h:631
ebur128_state * r128_in
Definition: af_loudnorm.c:94
This structure describes decoded (raw) audio or video data.
Definition: frame.h:184
AVOption.
Definition: opt.h:245
double weights[21]
Definition: af_loudnorm.c:72
static int linear(InterplayACMContext *s, unsigned ind, unsigned col)
Definition: interplayacm.c:115
Main libavfilter public API header.
static char * print_format
Definition: ffprobe.c:102
double * buf
Definition: af_loudnorm.c:66
static enum AVSampleFormat formats[]
Definition: avresample.c:163
double target_tp
Definition: af_loudnorm.c:56
AVFilter ff_af_loudnorm
Definition: af_loudnorm.c:911
static int config_input(AVFilterLink *inlink)
Definition: af_loudnorm.c:723
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
AVFILTER_DEFINE_CLASS(loudnorm)
const char * name
Pad name.
Definition: internal.h:59
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:313
static const AVOption loudnorm_options[]
Definition: af_loudnorm.c:101
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1180
#define av_cold
Definition: attributes.h:82
double measured_tp
Definition: af_loudnorm.c:59
AVOptions.
double measured_lra
Definition: af_loudnorm.c:58
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
Definition: af_loudnorm.c:167
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:268
static AVFrame * frame
static const uint64_t c1
Definition: murmur3.c:49
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define av_log(a,...)
enum PrintFormat print_format
Definition: af_loudnorm.c:64
A filter pad used for either input or output.
Definition: internal.h:53
FrameType
G723.1 frame types.
Definition: g723_1.h:63
double target_lra
Definition: af_loudnorm.c:55
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
static int request_frame(AVFilterLink *outlink)
Definition: af_loudnorm.c:638
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:65
#define AVERROR(e)
Definition: error.h:43
ebur128_state * r128_out
Definition: af_loudnorm.c:95
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:153
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:176
void * priv
private data for use by the filter
Definition: avfilter.h:320
tuple adjust
Definition: normalize.py:25
static av_always_inline av_const double round(double x)
Definition: libm.h:444
static const uint8_t offset[127][2]
Definition: vf_spp.c:92
double * prev_smp
Definition: af_loudnorm.c:78
int8_t exp
Definition: eval.c:64
static const AVFilterPad avfilter_af_loudnorm_inputs[]
Definition: af_loudnorm.c:892
double measured_i
Definition: af_loudnorm.c:57
enum LimiterState limiter_state
Definition: af_loudnorm.c:81
double measured_thresh
Definition: af_loudnorm.c:60
int ff_formats_ref(AVFilterFormats *f, AVFilterFormats **ref)
Add *ref as a new reference to formats.
Definition: formats.c:440
double * limiter_buf
Definition: af_loudnorm.c:77
AVFormatContext * ctx
Definition: movenc.c:48
int n
Definition: avisynth_c.h:547
static const AVFilterPad outputs[]
Definition: af_afftfilt.c:386
static double gaussian_filter(LoudNormContext *s, int index)
Definition: af_loudnorm.c:155
#define src
Definition: vp9dsp.c:530
A list of supported channel layouts.
Definition: formats.h:85
sample_rate
#define AV_LOG_INFO
Standard information.
Definition: log.h:187
static const AVFilterPad inputs[]
Definition: af_afftfilt.c:376
#define OFFSET(x)
Definition: af_loudnorm.c:98
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
double prev_delta
Definition: af_loudnorm.c:73
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:520
static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
Definition: af_loudnorm.c:230
enum FrameType frame_type
Definition: af_loudnorm.c:89
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
double delta[30]
Definition: af_loudnorm.c:71
void * buf
Definition: avisynth_c.h:553
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_loudnorm.c:793
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:142
int index
Definition: gxfenc.c:89
const char * name
Filter name.
Definition: avfilter.h:146
static int frame_size(int sample_rate, int frame_len_msec)
Definition: af_loudnorm.c:127
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:317
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:198
double gain_reduction[2]
Definition: af_loudnorm.c:76
static int query_formats(AVFilterContext *ctx)
Definition: af_loudnorm.c:683
static double c[64]
static const uint64_t c2
Definition: murmur3.c:50
static void init_gaussian_filter(LoudNormContext *s)
Definition: af_loudnorm.c:133
static uint8_t tmp[8]
Definition: des.c:38
A list of supported formats for one end of a filter link.
Definition: formats.h:64
An instance of a filter.
Definition: avfilter.h:305
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:701
FILE * out
Definition: movenc.c:54
static const AVFilterPad avfilter_af_loudnorm_outputs[]
Definition: af_loudnorm.c:902
#define av_freep(p)
#define M_PI
Definition: mathematics.h:46
LimiterState
Definition: af_loudnorm.c:37
#define av_malloc_array(a, b)
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:369
internal API functions
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition...
Definition: formats.c:410
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: af_loudnorm.c:409
#define FLAGS
Definition: af_loudnorm.c:99
PrintFormat
Definition: af_loudnorm.c:45
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:241
for(j=16;j >0;--j)
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:580