FFmpeg
af_speechnorm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Paul B Mahol
3  *
4  * Speech Normalizer
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * Speech Normalizer
26  */
27 
28 #include <float.h>
29 
30 #include "libavutil/avassert.h"
32 #include "libavutil/mem.h"
33 #include "libavutil/opt.h"
34 
35 #define FF_BUFQUEUE_SIZE (1024)
36 #include "bufferqueue.h"
37 
38 #include "audio.h"
39 #include "avfilter.h"
40 #include "filters.h"
41 #include "internal.h"
42 
43 #define MAX_ITEMS 882000
44 #define MIN_PEAK (1. / 32768.)
45 
46 typedef struct PeriodItem {
47  int size;
48  int type;
49  double max_peak;
50  double rms_sum;
51 } PeriodItem;
52 
53 typedef struct ChannelContext {
54  int state;
55  int bypass;
57  double gain_state;
58  double pi_max_peak;
59  double pi_rms_sum;
60  int pi_start;
61  int pi_end;
62  int pi_size;
64 
65 typedef struct SpeechNormalizerContext {
66  const AVClass *class;
67 
68  double rms_value;
69  double peak_value;
70  double max_expansion;
73  double raise_amount;
74  double fall_amount;
77  int invert;
78  int link;
79 
81  double prev_gain;
82 
84  int eof;
85  int64_t pts;
86 
87  struct FFBufQueue queue;
88 
90  const uint8_t *srcp, int nb_samples);
92  AVFrame *in, AVFrame *out, int nb_samples);
94 
95 #define OFFSET(x) offsetof(SpeechNormalizerContext, x)
96 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
97 
98 static const AVOption speechnorm_options[] = {
99  { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
100  { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
101  { "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
102  { "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
103  { "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
104  { "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
105  { "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
106  { "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
107  { "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
108  { "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
109  { "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
110  { "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
111  { "channels", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
112  { "h", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
113  { "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
114  { "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
115  { "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
116  { "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
117  { "rms", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
118  { "m", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
119  { NULL }
120 };
121 
122 AVFILTER_DEFINE_CLASS(speechnorm);
123 
124 static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
125 {
126  int sum;
127 
128  if (pi[start].type == 0)
129  return remain;
130 
131  sum = remain;
132  while (start != end) {
133  start++;
134  if (start >= MAX_ITEMS)
135  start = 0;
136  if (pi[start].type == 0)
137  break;
138  av_assert1(pi[start].size > 0);
139  sum += pi[start].size;
140  }
141 
142  return sum;
143 }
144 
146 {
147  SpeechNormalizerContext *s = ctx->priv;
148  AVFilterLink *inlink = ctx->inputs[0];
149  int min_pi_nb_samples;
150 
151  min_pi_nb_samples = get_pi_samples(s->cc[0].pi, s->cc[0].pi_start, s->cc[0].pi_end, s->cc[0].pi_size);
152  for (int ch = 1; ch < inlink->ch_layout.nb_channels && min_pi_nb_samples > 0; ch++) {
153  ChannelContext *cc = &s->cc[ch];
154 
155  min_pi_nb_samples = FFMIN(min_pi_nb_samples, get_pi_samples(cc->pi, cc->pi_start, cc->pi_end, cc->pi_size));
156  }
157 
158  return min_pi_nb_samples;
159 }
160 
161 static void consume_pi(ChannelContext *cc, int nb_samples)
162 {
163  if (cc->pi_size >= nb_samples) {
164  cc->pi_size -= nb_samples;
165  } else {
166  av_assert1(0);
167  }
168 }
169 
170 static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state,
171  double pi_rms_sum, int pi_size)
172 {
173  SpeechNormalizerContext *s = ctx->priv;
174  const double compression = 1. / s->max_compression;
175  const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
176  double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
177 
178  if (s->rms_value > DBL_EPSILON)
179  expansion = FFMIN(expansion, s->rms_value / sqrt(pi_rms_sum / pi_size));
180 
181  if (bypass) {
182  return 1.;
183  } else if (type) {
184  return FFMIN(expansion, state + s->raise_amount);
185  } else {
186  return FFMIN(expansion, FFMAX(compression, state - s->fall_amount));
187  }
188 }
189 
190 static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
191 {
192  av_assert1(cc->pi_size >= 0);
193  if (cc->pi_size == 0) {
194  SpeechNormalizerContext *s = ctx->priv;
195  int start = cc->pi_start;
196 
197  av_assert1(cc->pi[start].size > 0);
198  av_assert0(cc->pi[start].type > 0 || s->eof);
199  cc->pi_size = cc->pi[start].size;
200  cc->pi_rms_sum = cc->pi[start].rms_sum;
201  cc->pi_max_peak = cc->pi[start].max_peak;
202  av_assert1(cc->pi_start != cc->pi_end || s->eof);
203  start++;
204  if (start >= MAX_ITEMS)
205  start = 0;
206  cc->pi_start = start;
207  cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state,
208  cc->pi_rms_sum, cc->pi_size);
209  }
210 }
211 
212 static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
213 {
214  SpeechNormalizerContext *s = ctx->priv;
215  double min_gain = s->max_expansion;
216  double gain_state = cc->gain_state;
217  int size = cc->pi_size;
218  int idx = cc->pi_start;
219 
220  min_gain = FFMIN(min_gain, gain_state);
221  while (size <= max_size) {
222  if (idx == cc->pi_end)
223  break;
224  gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state,
225  cc->pi[idx].rms_sum, cc->pi[idx].size);
226  min_gain = FFMIN(min_gain, gain_state);
227  size += cc->pi[idx].size;
228  idx++;
229  if (idx >= MAX_ITEMS)
230  idx = 0;
231  }
232 
233  return min_gain;
234 }
235 
236 #define ANALYZE_CHANNEL(name, ptype, zero, min_peak) \
237 static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc, \
238  const uint8_t *srcp, int nb_samples) \
239 { \
240  SpeechNormalizerContext *s = ctx->priv; \
241  const ptype *src = (const ptype *)srcp; \
242  const int max_period = s->max_period; \
243  PeriodItem *pi = (PeriodItem *)&cc->pi; \
244  int pi_end = cc->pi_end; \
245  int n = 0; \
246  \
247  if (cc->state < 0) \
248  cc->state = src[0] >= zero; \
249  \
250  while (n < nb_samples) { \
251  ptype new_max_peak; \
252  ptype new_rms_sum; \
253  int new_size; \
254  \
255  if ((cc->state != (src[n] >= zero)) || \
256  (pi[pi_end].size > max_period)) { \
257  ptype max_peak = pi[pi_end].max_peak; \
258  ptype rms_sum = pi[pi_end].rms_sum; \
259  int state = cc->state; \
260  \
261  cc->state = src[n] >= zero; \
262  av_assert1(pi[pi_end].size > 0); \
263  if (max_peak >= min_peak || \
264  pi[pi_end].size > max_period) { \
265  pi[pi_end].type = 1; \
266  pi_end++; \
267  if (pi_end >= MAX_ITEMS) \
268  pi_end = 0; \
269  if (cc->state != state) { \
270  pi[pi_end].max_peak = DBL_MIN; \
271  pi[pi_end].rms_sum = 0.0; \
272  } else { \
273  pi[pi_end].max_peak = max_peak; \
274  pi[pi_end].rms_sum = rms_sum; \
275  } \
276  pi[pi_end].type = 0; \
277  pi[pi_end].size = 0; \
278  av_assert1(pi_end != cc->pi_start); \
279  } \
280  } \
281  \
282  new_max_peak = pi[pi_end].max_peak; \
283  new_rms_sum = pi[pi_end].rms_sum; \
284  new_size = pi[pi_end].size; \
285  if (cc->state) { \
286  while (src[n] >= zero) { \
287  new_max_peak = FFMAX(new_max_peak, src[n]); \
288  new_rms_sum += src[n] * src[n]; \
289  new_size++; \
290  n++; \
291  if (n >= nb_samples) \
292  break; \
293  } \
294  } else { \
295  while (src[n] < zero) { \
296  new_max_peak = FFMAX(new_max_peak, -src[n]); \
297  new_rms_sum += src[n] * src[n]; \
298  new_size++; \
299  n++; \
300  if (n >= nb_samples) \
301  break; \
302  } \
303  } \
304  \
305  pi[pi_end].max_peak = new_max_peak; \
306  pi[pi_end].rms_sum = new_rms_sum; \
307  pi[pi_end].size = new_size; \
308  } \
309  cc->pi_end = pi_end; \
310 }
311 
312 ANALYZE_CHANNEL(dbl, double, 0.0, MIN_PEAK)
313 ANALYZE_CHANNEL(flt, float, 0.f, (float)MIN_PEAK)
314 
315 #define FILTER_CHANNELS(name, ptype) \
316 static void filter_channels_## name (AVFilterContext *ctx, \
317  AVFrame *in, AVFrame *out, int nb_samples) \
318 { \
319  SpeechNormalizerContext *s = ctx->priv; \
320  AVFilterLink *inlink = ctx->inputs[0]; \
321  \
322  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
323  ChannelContext *cc = &s->cc[ch]; \
324  const ptype *src = (const ptype *)in->extended_data[ch]; \
325  ptype *dst = (ptype *)out->extended_data[ch]; \
326  enum AVChannel channel = av_channel_layout_channel_from_index(&inlink->ch_layout, ch); \
327  const int bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
328  int n = 0; \
329  \
330  while (n < nb_samples) { \
331  ptype gain; \
332  int size; \
333  \
334  next_pi(ctx, cc, bypass); \
335  size = FFMIN(nb_samples - n, cc->pi_size); \
336  av_assert1(size > 0); \
337  gain = cc->gain_state; \
338  consume_pi(cc, size); \
339  for (int i = n; !ctx->is_disabled && i < n + size; i++) \
340  dst[i] = src[i] * gain; \
341  n += size; \
342  } \
343  } \
344 }
345 
346 FILTER_CHANNELS(dbl, double)
347 FILTER_CHANNELS(flt, float)
348 
349 static double dlerp(double min, double max, double mix)
350 {
351  return min + (max - min) * mix;
352 }
353 
354 static float flerp(float min, float max, float mix)
355 {
356  return min + (max - min) * mix;
357 }
358 
359 #define FILTER_LINK_CHANNELS(name, ptype, tlerp) \
360 static void filter_link_channels_## name (AVFilterContext *ctx, \
361  AVFrame *in, AVFrame *out, \
362  int nb_samples) \
363 { \
364  SpeechNormalizerContext *s = ctx->priv; \
365  AVFilterLink *inlink = ctx->inputs[0]; \
366  int n = 0; \
367  \
368  while (n < nb_samples) { \
369  int min_size = nb_samples - n; \
370  ptype gain = s->max_expansion; \
371  \
372  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
373  ChannelContext *cc = &s->cc[ch]; \
374  \
375  enum AVChannel channel = av_channel_layout_channel_from_index(&inlink->ch_layout, ch); \
376  cc->bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
377  \
378  next_pi(ctx, cc, cc->bypass); \
379  min_size = FFMIN(min_size, cc->pi_size); \
380  } \
381  \
382  av_assert1(min_size > 0); \
383  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
384  ChannelContext *cc = &s->cc[ch]; \
385  \
386  if (cc->bypass) \
387  continue; \
388  gain = FFMIN(gain, min_gain(ctx, cc, min_size)); \
389  } \
390  \
391  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
392  ChannelContext *cc = &s->cc[ch]; \
393  const ptype *src = (const ptype *)in->extended_data[ch]; \
394  ptype *dst = (ptype *)out->extended_data[ch]; \
395  \
396  consume_pi(cc, min_size); \
397  if (cc->bypass) \
398  continue; \
399  \
400  for (int i = n; !ctx->is_disabled && i < n + min_size; i++) { \
401  ptype g = tlerp(s->prev_gain, gain, (i - n) / (ptype)min_size); \
402  dst[i] = src[i] * g; \
403  } \
404  } \
405  \
406  s->prev_gain = gain; \
407  n += min_size; \
408  } \
409 }
410 
411 FILTER_LINK_CHANNELS(dbl, double, dlerp)
412 FILTER_LINK_CHANNELS(flt, float, flerp)
413 
415 {
416  SpeechNormalizerContext *s = ctx->priv;
417  AVFilterLink *outlink = ctx->outputs[0];
418  AVFilterLink *inlink = ctx->inputs[0];
419  int ret;
420 
421  while (s->queue.available > 0) {
422  int min_pi_nb_samples;
423  AVFrame *in, *out;
424 
425  in = ff_bufqueue_peek(&s->queue, 0);
426  if (!in)
427  break;
428 
429  min_pi_nb_samples = available_samples(ctx);
430  if (min_pi_nb_samples < in->nb_samples && !s->eof)
431  break;
432 
433  in = ff_bufqueue_get(&s->queue);
434 
435  if (av_frame_is_writable(in)) {
436  out = in;
437  } else {
438  out = ff_get_audio_buffer(outlink, in->nb_samples);
439  if (!out) {
440  av_frame_free(&in);
441  return AVERROR(ENOMEM);
442  }
444  }
445 
446  s->filter_channels[s->link](ctx, in, out, in->nb_samples);
447 
448  s->pts = in->pts + av_rescale_q(in->nb_samples, av_make_q(1, outlink->sample_rate),
449  outlink->time_base);
450 
451  if (out != in)
452  av_frame_free(&in);
453  return ff_filter_frame(outlink, out);
454  }
455 
456  for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {
457  AVFrame *in;
458 
460  if (ret < 0)
461  return ret;
462  if (ret == 0)
463  break;
464 
465  ff_bufqueue_add(ctx, &s->queue, in);
466 
467  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
468  ChannelContext *cc = &s->cc[ch];
469 
470  s->analyze_channel(ctx, cc, in->extended_data[ch], in->nb_samples);
471  }
472  }
473 
474  return 1;
475 }
476 
478 {
479  AVFilterLink *inlink = ctx->inputs[0];
480  AVFilterLink *outlink = ctx->outputs[0];
481  SpeechNormalizerContext *s = ctx->priv;
482  int ret, status;
483  int64_t pts;
484 
485  ret = av_channel_layout_copy(&s->ch_layout, &inlink->ch_layout);
486  if (ret < 0)
487  return ret;
488  if (strcmp(s->ch_layout_str, "all"))
489  av_channel_layout_from_string(&s->ch_layout,
490  s->ch_layout_str);
491 
493 
494  ret = filter_frame(ctx);
495  if (ret <= 0)
496  return ret;
497 
498  if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
499  if (status == AVERROR_EOF)
500  s->eof = 1;
501  }
502 
503  if (s->eof && ff_inlink_queued_samples(inlink) == 0 &&
504  s->queue.available == 0) {
505  ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
506  return 0;
507  }
508 
509  if (s->queue.available > 0) {
510  AVFrame *in = ff_bufqueue_peek(&s->queue, 0);
511  const int nb_samples = available_samples(ctx);
512 
513  if (nb_samples >= in->nb_samples || s->eof) {
515  return 0;
516  }
517  }
518 
520 
521  return FFERROR_NOT_READY;
522 }
523 
525 {
526  AVFilterContext *ctx = inlink->dst;
527  SpeechNormalizerContext *s = ctx->priv;
528 
529  s->max_period = inlink->sample_rate / 10;
530 
531  s->prev_gain = 1.;
532  s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc));
533  if (!s->cc)
534  return AVERROR(ENOMEM);
535 
536  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
537  ChannelContext *cc = &s->cc[ch];
538 
539  cc->state = -1;
540  cc->gain_state = s->max_expansion;
541  }
542 
543  switch (inlink->format) {
544  case AV_SAMPLE_FMT_FLTP:
545  s->analyze_channel = analyze_channel_flt;
546  s->filter_channels[0] = filter_channels_flt;
547  s->filter_channels[1] = filter_link_channels_flt;
548  break;
549  case AV_SAMPLE_FMT_DBLP:
550  s->analyze_channel = analyze_channel_dbl;
551  s->filter_channels[0] = filter_channels_dbl;
552  s->filter_channels[1] = filter_link_channels_dbl;
553  break;
554  default:
555  av_assert1(0);
556  }
557 
558  return 0;
559 }
560 
561 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
562  char *res, int res_len, int flags)
563 {
564  SpeechNormalizerContext *s = ctx->priv;
565  int link = s->link;
566  int ret;
567 
568  ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
569  if (ret < 0)
570  return ret;
571  if (link != s->link)
572  s->prev_gain = 1.;
573 
574  return 0;
575 }
576 
578 {
579  SpeechNormalizerContext *s = ctx->priv;
580 
581  ff_bufqueue_discard_all(&s->queue);
582  av_channel_layout_uninit(&s->ch_layout);
583  av_freep(&s->cc);
584 }
585 
586 static const AVFilterPad inputs[] = {
587  {
588  .name = "default",
589  .type = AVMEDIA_TYPE_AUDIO,
590  .config_props = config_input,
591  },
592 };
593 
595  .name = "speechnorm",
596  .description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
597  .priv_size = sizeof(SpeechNormalizerContext),
598  .priv_class = &speechnorm_class,
599  .activate = activate,
600  .uninit = uninit,
605  .process_command = process_command,
606 };
inputs
static const AVFilterPad inputs[]
Definition: af_speechnorm.c:586
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:97
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:66
get_pi_samples
static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
Definition: af_speechnorm.c:124
mix
static int mix(int c0, int c1)
Definition: 4xm.c:716
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
out
FILE * out
Definition: movenc.c:55
consume_pi
static void consume_pi(ChannelContext *cc, int nb_samples)
Definition: af_speechnorm.c:161
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1015
PeriodItem::type
int type
Definition: af_speechnorm.c:48
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
Definition: af_speechnorm.c:561
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:160
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:374
SpeechNormalizerContext::ch_layout
AVChannelLayout ch_layout
Definition: af_speechnorm.c:76
AVOption
AVOption.
Definition: opt.h:346
PeriodItem
Definition: af_speechnorm.c:46
SpeechNormalizerContext::peak_value
double peak_value
Definition: af_speechnorm.c:69
float.h
SpeechNormalizerContext::filter_channels
void(* filter_channels[2])(AVFilterContext *ctx, AVFrame *in, AVFrame *out, int nb_samples)
Definition: af_speechnorm.c:91
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
ChannelContext::gain_state
double gain_state
Definition: af_speechnorm.c:57
next_gain
static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state, double pi_rms_sum, int pi_size)
Definition: af_speechnorm.c:170
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
SpeechNormalizerContext::link
int link
Definition: af_speechnorm.c:78
MAX_ITEMS
#define MAX_ITEMS
Definition: af_speechnorm.c:43
ff_bufqueue_get
static AVFrame * ff_bufqueue_get(struct FFBufQueue *queue)
Get the first buffer from the queue and remove it.
Definition: bufferqueue.h:98
SpeechNormalizerContext::threshold_value
double threshold_value
Definition: af_speechnorm.c:72
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1442
dlerp
static double dlerp(double min, double max, double mix)
Definition: af_speechnorm.c:349
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
pts
static int64_t pts
Definition: transcode_aac.c:644
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(speechnorm)
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:33
SpeechNormalizerContext::rms_value
double rms_value
Definition: af_speechnorm.c:68
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
SpeechNormalizerContext::eof
int eof
Definition: af_speechnorm.c:84
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
ChannelContext::pi_rms_sum
double pi_rms_sum
Definition: af_speechnorm.c:59
s
#define s(width, name)
Definition: cbs_vp9.c:198
available_samples
static int available_samples(AVFilterContext *ctx)
Definition: af_speechnorm.c:145
SpeechNormalizerContext::raise_amount
double raise_amount
Definition: af_speechnorm.c:73
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:237
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
filters.h
ctx
AVFormatContext * ctx
Definition: movenc.c:49
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
SpeechNormalizerContext::cc
ChannelContext * cc
Definition: af_speechnorm.c:80
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:182
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
min_gain
static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
Definition: af_speechnorm.c:212
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:709
state
static struct @414 state
ff_bufqueue_discard_all
static void ff_bufqueue_discard_all(struct FFBufQueue *queue)
Unref and remove all buffers from the queue.
Definition: bufferqueue.h:111
ChannelContext::pi_max_peak
double pi_max_peak
Definition: af_speechnorm.c:58
ff_audio_default_filterpad
const AVFilterPad ff_audio_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_AUDIO.
Definition: audio.c:33
PeriodItem::size
int size
Definition: af_speechnorm.c:47
ChannelContext::pi_end
int pi_end
Definition: af_speechnorm.c:61
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1389
ff_inlink_queued_frames
size_t ff_inlink_queued_frames(AVFilterLink *link)
Get the number of frames available on the link.
Definition: avfilter.c:1405
bufferqueue.h
f
f
Definition: af_crystalizer.c:121
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
AVChannelLayout
An AVChannelLayout holds information about the channel layout of audio data.
Definition: channel_layout.h:303
size
int size
Definition: twinvq_data.h:10344
av_make_q
static AVRational av_make_q(int num, int den)
Create an AVRational.
Definition: rational.h:71
av_frame_is_writable
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:645
SpeechNormalizerContext::max_period
int max_period
Definition: af_speechnorm.c:83
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:887
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
ff_bufqueue_add
static void ff_bufqueue_add(void *log, struct FFBufQueue *queue, AVFrame *buf)
Add a buffer to the queue.
Definition: bufferqueue.h:71
FILTER_CHANNELS
#define FILTER_CHANNELS(name, ptype)
Definition: af_speechnorm.c:315
SpeechNormalizerContext
Definition: af_speechnorm.c:65
internal.h
av_channel_layout_from_string
int av_channel_layout_from_string(AVChannelLayout *channel_layout, const char *str)
Initialize a channel layout from a given string description.
Definition: channel_layout.c:303
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:454
ff_bufqueue_peek
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87
FFBufQueue
Structure holding the queue.
Definition: bufferqueue.h:49
invert
static void invert(float *h, int n)
Definition: asrc_sinc.c:185
AVFrame::extended_data
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:435
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:56
FLAGS
#define FLAGS
Definition: af_speechnorm.c:96
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
SpeechNormalizerContext::invert
int invert
Definition: af_speechnorm.c:77
SpeechNormalizerContext::queue
struct FFBufQueue queue
Definition: af_speechnorm.c:87
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:39
ff_inlink_queued_samples
int ff_inlink_queued_samples(AVFilterLink *link)
Definition: avfilter.c:1417
ANALYZE_CHANNEL
#define ANALYZE_CHANNEL(name, ptype, zero, min_peak)
Definition: af_speechnorm.c:236
ChannelContext::pi_size
int pi_size
Definition: af_speechnorm.c:62
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
SpeechNormalizerContext::fall_amount
double fall_amount
Definition: af_speechnorm.c:74
activate
static int activate(AVFilterContext *ctx)
Definition: af_speechnorm.c:477
AVFilter
Filter definition.
Definition: avfilter.h:166
ret
ret
Definition: filter_design.txt:187
PeriodItem::max_peak
double max_peak
Definition: af_speechnorm.c:49
MIN_PEAK
#define MIN_PEAK
Definition: af_speechnorm.c:44
ChannelContext::bypass
int bypass
Definition: af_speechnorm.c:55
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_speechnorm.c:577
config_input
static int config_input(AVFilterLink *inlink)
Definition: af_speechnorm.c:524
status
ov_status_e status
Definition: dnn_backend_openvino.c:121
channel_layout.h
next_pi
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
Definition: af_speechnorm.c:190
avfilter.h
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:433
ChannelContext::pi_start
int pi_start
Definition: af_speechnorm.c:60
AV_SAMPLE_FMT_DBLP
@ AV_SAMPLE_FMT_DBLP
double, planar
Definition: samplefmt.h:67
ChannelContext
Definition: hcadec.c:35
ChannelContext::state
int state
Definition: af_speechnorm.c:54
SpeechNormalizerContext::max_compression
double max_compression
Definition: af_speechnorm.c:71
ChannelContext::pi
PeriodItem pi[MAX_ITEMS]
Definition: af_speechnorm.c:56
AVFilterContext
An instance of a filter.
Definition: avfilter.h:407
av_channel_layout_copy
int av_channel_layout_copy(AVChannelLayout *dst, const AVChannelLayout *src)
Make a copy of a channel layout.
Definition: channel_layout.c:440
filter_frame
static int filter_frame(AVFilterContext *ctx)
Definition: af_speechnorm.c:414
FILTER_LINK_CHANNELS
#define FILTER_LINK_CHANNELS(name, ptype, tlerp)
Definition: af_speechnorm.c:359
flerp
static float flerp(float min, float max, float mix)
Definition: af_speechnorm.c:354
mem.h
audio.h
PeriodItem::rms_sum
double rms_sum
Definition: af_speechnorm.c:50
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:251
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:183
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
SpeechNormalizerContext::ch_layout_str
char * ch_layout_str
Definition: af_speechnorm.c:75
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:155
OFFSET
#define OFFSET(x)
Definition: af_speechnorm.c:95
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:474
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Definition: opt.h:239
SpeechNormalizerContext::pts
int64_t pts
Definition: af_speechnorm.c:85
SpeechNormalizerContext::max_expansion
double max_expansion
Definition: af_speechnorm.c:70
speechnorm_options
static const AVOption speechnorm_options[]
Definition: af_speechnorm.c:98
SpeechNormalizerContext::analyze_channel
void(* analyze_channel)(AVFilterContext *ctx, ChannelContext *cc, const uint8_t *srcp, int nb_samples)
Definition: af_speechnorm.c:89
ff_af_speechnorm
const AVFilter ff_af_speechnorm
Definition: af_speechnorm.c:594
FILTER_SAMPLEFMTS
#define FILTER_SAMPLEFMTS(...)
Definition: internal.h:170
ff_filter_set_ready
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:235
min
float min
Definition: vorbis_enc_data.h:429
SpeechNormalizerContext::prev_gain
double prev_gain
Definition: af_speechnorm.c:81