FFmpeg
af_speechnorm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Paul B Mahol
3  *
4  * Speech Normalizer
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * Speech Normalizer
26  */
27 
28 #include <float.h>
29 
30 #include "libavutil/avassert.h"
32 #include "libavutil/opt.h"
33 
34 #define FF_BUFQUEUE_SIZE (1024)
35 #include "bufferqueue.h"
36 
37 #include "audio.h"
38 #include "avfilter.h"
39 #include "filters.h"
40 #include "internal.h"
41 
42 #define MAX_ITEMS 882000
43 #define MIN_PEAK (1. / 32768.)
44 
45 typedef struct PeriodItem {
46  int size;
47  int type;
48  double max_peak;
49  double rms_sum;
50 } PeriodItem;
51 
52 typedef struct ChannelContext {
53  int state;
54  int bypass;
56  double gain_state;
57  double pi_max_peak;
58  double pi_rms_sum;
59  int pi_start;
60  int pi_end;
61  int pi_size;
63 
64 typedef struct SpeechNormalizerContext {
65  const AVClass *class;
66 
67  double rms_value;
68  double peak_value;
69  double max_expansion;
72  double raise_amount;
73  double fall_amount;
76  int invert;
77  int link;
78 
80  double prev_gain;
81 
83  int eof;
84  int64_t pts;
85 
86  struct FFBufQueue queue;
87 
89  const uint8_t *srcp, int nb_samples);
91  AVFrame *in, AVFrame *out, int nb_samples);
93 
94 #define OFFSET(x) offsetof(SpeechNormalizerContext, x)
95 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
96 
97 static const AVOption speechnorm_options[] = {
98  { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
99  { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
100  { "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
101  { "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
102  { "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
103  { "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
104  { "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
105  { "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
106  { "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
107  { "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
108  { "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
109  { "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
110  { "channels", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
111  { "h", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
112  { "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
113  { "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
114  { "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
115  { "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
116  { "rms", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
117  { "m", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
118  { NULL }
119 };
120 
121 AVFILTER_DEFINE_CLASS(speechnorm);
122 
123 static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
124 {
125  int sum;
126 
127  if (pi[start].type == 0)
128  return remain;
129 
130  sum = remain;
131  while (start != end) {
132  start++;
133  if (start >= MAX_ITEMS)
134  start = 0;
135  if (pi[start].type == 0)
136  break;
137  av_assert1(pi[start].size > 0);
138  sum += pi[start].size;
139  }
140 
141  return sum;
142 }
143 
145 {
146  SpeechNormalizerContext *s = ctx->priv;
147  AVFilterLink *inlink = ctx->inputs[0];
148  int min_pi_nb_samples;
149 
150  min_pi_nb_samples = get_pi_samples(s->cc[0].pi, s->cc[0].pi_start, s->cc[0].pi_end, s->cc[0].pi_size);
151  for (int ch = 1; ch < inlink->ch_layout.nb_channels && min_pi_nb_samples > 0; ch++) {
152  ChannelContext *cc = &s->cc[ch];
153 
154  min_pi_nb_samples = FFMIN(min_pi_nb_samples, get_pi_samples(cc->pi, cc->pi_start, cc->pi_end, cc->pi_size));
155  }
156 
157  return min_pi_nb_samples;
158 }
159 
160 static void consume_pi(ChannelContext *cc, int nb_samples)
161 {
162  if (cc->pi_size >= nb_samples) {
163  cc->pi_size -= nb_samples;
164  } else {
165  av_assert1(0);
166  }
167 }
168 
169 static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state,
170  double pi_rms_sum, int pi_size)
171 {
172  SpeechNormalizerContext *s = ctx->priv;
173  const double compression = 1. / s->max_compression;
174  const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
175  double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
176 
177  if (s->rms_value > DBL_EPSILON)
178  expansion = FFMIN(expansion, s->rms_value / sqrt(pi_rms_sum / pi_size));
179 
180  if (bypass) {
181  return 1.;
182  } else if (type) {
183  return FFMIN(expansion, state + s->raise_amount);
184  } else {
185  return FFMIN(expansion, FFMAX(compression, state - s->fall_amount));
186  }
187 }
188 
189 static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
190 {
191  av_assert1(cc->pi_size >= 0);
192  if (cc->pi_size == 0) {
193  SpeechNormalizerContext *s = ctx->priv;
194  int start = cc->pi_start;
195 
196  av_assert1(cc->pi[start].size > 0);
197  av_assert0(cc->pi[start].type > 0 || s->eof);
198  cc->pi_size = cc->pi[start].size;
199  cc->pi_rms_sum = cc->pi[start].rms_sum;
200  cc->pi_max_peak = cc->pi[start].max_peak;
201  av_assert1(cc->pi_start != cc->pi_end || s->eof);
202  start++;
203  if (start >= MAX_ITEMS)
204  start = 0;
205  cc->pi_start = start;
206  cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state,
207  cc->pi_rms_sum, cc->pi_size);
208  }
209 }
210 
211 static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
212 {
213  SpeechNormalizerContext *s = ctx->priv;
214  double min_gain = s->max_expansion;
215  double gain_state = cc->gain_state;
216  int size = cc->pi_size;
217  int idx = cc->pi_start;
218 
219  min_gain = FFMIN(min_gain, gain_state);
220  while (size <= max_size) {
221  if (idx == cc->pi_end)
222  break;
223  gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state,
224  cc->pi[idx].rms_sum, cc->pi[idx].size);
225  min_gain = FFMIN(min_gain, gain_state);
226  size += cc->pi[idx].size;
227  idx++;
228  if (idx >= MAX_ITEMS)
229  idx = 0;
230  }
231 
232  return min_gain;
233 }
234 
235 #define ANALYZE_CHANNEL(name, ptype, zero, min_peak) \
236 static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc, \
237  const uint8_t *srcp, int nb_samples) \
238 { \
239  SpeechNormalizerContext *s = ctx->priv; \
240  const ptype *src = (const ptype *)srcp; \
241  const int max_period = s->max_period; \
242  PeriodItem *pi = (PeriodItem *)&cc->pi; \
243  int pi_end = cc->pi_end; \
244  int n = 0; \
245  \
246  if (cc->state < 0) \
247  cc->state = src[0] >= zero; \
248  \
249  while (n < nb_samples) { \
250  ptype new_max_peak; \
251  ptype new_rms_sum; \
252  int new_size; \
253  \
254  if ((cc->state != (src[n] >= zero)) || \
255  (pi[pi_end].size > max_period)) { \
256  ptype max_peak = pi[pi_end].max_peak; \
257  ptype rms_sum = pi[pi_end].rms_sum; \
258  int state = cc->state; \
259  \
260  cc->state = src[n] >= zero; \
261  av_assert1(pi[pi_end].size > 0); \
262  if (max_peak >= min_peak || \
263  pi[pi_end].size > max_period) { \
264  pi[pi_end].type = 1; \
265  pi_end++; \
266  if (pi_end >= MAX_ITEMS) \
267  pi_end = 0; \
268  if (cc->state != state) { \
269  pi[pi_end].max_peak = DBL_MIN; \
270  pi[pi_end].rms_sum = 0.0; \
271  } else { \
272  pi[pi_end].max_peak = max_peak; \
273  pi[pi_end].rms_sum = rms_sum; \
274  } \
275  pi[pi_end].type = 0; \
276  pi[pi_end].size = 0; \
277  av_assert1(pi_end != cc->pi_start); \
278  } \
279  } \
280  \
281  new_max_peak = pi[pi_end].max_peak; \
282  new_rms_sum = pi[pi_end].rms_sum; \
283  new_size = pi[pi_end].size; \
284  if (cc->state) { \
285  while (src[n] >= zero) { \
286  new_max_peak = FFMAX(new_max_peak, src[n]); \
287  new_rms_sum += src[n] * src[n]; \
288  new_size++; \
289  n++; \
290  if (n >= nb_samples) \
291  break; \
292  } \
293  } else { \
294  while (src[n] < zero) { \
295  new_max_peak = FFMAX(new_max_peak, -src[n]); \
296  new_rms_sum += src[n] * src[n]; \
297  new_size++; \
298  n++; \
299  if (n >= nb_samples) \
300  break; \
301  } \
302  } \
303  \
304  pi[pi_end].max_peak = new_max_peak; \
305  pi[pi_end].rms_sum = new_rms_sum; \
306  pi[pi_end].size = new_size; \
307  } \
308  cc->pi_end = pi_end; \
309 }
310 
311 ANALYZE_CHANNEL(dbl, double, 0.0, MIN_PEAK)
312 ANALYZE_CHANNEL(flt, float, 0.f, (float)MIN_PEAK)
313 
314 #define FILTER_CHANNELS(name, ptype) \
315 static void filter_channels_## name (AVFilterContext *ctx, \
316  AVFrame *in, AVFrame *out, int nb_samples) \
317 { \
318  SpeechNormalizerContext *s = ctx->priv; \
319  AVFilterLink *inlink = ctx->inputs[0]; \
320  \
321  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
322  ChannelContext *cc = &s->cc[ch]; \
323  const ptype *src = (const ptype *)in->extended_data[ch]; \
324  ptype *dst = (ptype *)out->extended_data[ch]; \
325  enum AVChannel channel = av_channel_layout_channel_from_index(&inlink->ch_layout, ch); \
326  const int bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
327  int n = 0; \
328  \
329  while (n < nb_samples) { \
330  ptype gain; \
331  int size; \
332  \
333  next_pi(ctx, cc, bypass); \
334  size = FFMIN(nb_samples - n, cc->pi_size); \
335  av_assert1(size > 0); \
336  gain = cc->gain_state; \
337  consume_pi(cc, size); \
338  for (int i = n; !ctx->is_disabled && i < n + size; i++) \
339  dst[i] = src[i] * gain; \
340  n += size; \
341  } \
342  } \
343 }
344 
345 FILTER_CHANNELS(dbl, double)
346 FILTER_CHANNELS(flt, float)
347 
348 static double dlerp(double min, double max, double mix)
349 {
350  return min + (max - min) * mix;
351 }
352 
353 static float flerp(float min, float max, float mix)
354 {
355  return min + (max - min) * mix;
356 }
357 
358 #define FILTER_LINK_CHANNELS(name, ptype, tlerp) \
359 static void filter_link_channels_## name (AVFilterContext *ctx, \
360  AVFrame *in, AVFrame *out, \
361  int nb_samples) \
362 { \
363  SpeechNormalizerContext *s = ctx->priv; \
364  AVFilterLink *inlink = ctx->inputs[0]; \
365  int n = 0; \
366  \
367  while (n < nb_samples) { \
368  int min_size = nb_samples - n; \
369  ptype gain = s->max_expansion; \
370  \
371  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
372  ChannelContext *cc = &s->cc[ch]; \
373  \
374  enum AVChannel channel = av_channel_layout_channel_from_index(&inlink->ch_layout, ch); \
375  cc->bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
376  \
377  next_pi(ctx, cc, cc->bypass); \
378  min_size = FFMIN(min_size, cc->pi_size); \
379  } \
380  \
381  av_assert1(min_size > 0); \
382  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
383  ChannelContext *cc = &s->cc[ch]; \
384  \
385  if (cc->bypass) \
386  continue; \
387  gain = FFMIN(gain, min_gain(ctx, cc, min_size)); \
388  } \
389  \
390  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
391  ChannelContext *cc = &s->cc[ch]; \
392  const ptype *src = (const ptype *)in->extended_data[ch]; \
393  ptype *dst = (ptype *)out->extended_data[ch]; \
394  \
395  consume_pi(cc, min_size); \
396  if (cc->bypass) \
397  continue; \
398  \
399  for (int i = n; !ctx->is_disabled && i < n + min_size; i++) { \
400  ptype g = tlerp(s->prev_gain, gain, (i - n) / (ptype)min_size); \
401  dst[i] = src[i] * g; \
402  } \
403  } \
404  \
405  s->prev_gain = gain; \
406  n += min_size; \
407  } \
408 }
409 
410 FILTER_LINK_CHANNELS(dbl, double, dlerp)
411 FILTER_LINK_CHANNELS(flt, float, flerp)
412 
414 {
415  SpeechNormalizerContext *s = ctx->priv;
416  AVFilterLink *outlink = ctx->outputs[0];
417  AVFilterLink *inlink = ctx->inputs[0];
418  int ret;
419 
420  while (s->queue.available > 0) {
421  int min_pi_nb_samples;
422  AVFrame *in, *out;
423 
424  in = ff_bufqueue_peek(&s->queue, 0);
425  if (!in)
426  break;
427 
428  min_pi_nb_samples = available_samples(ctx);
429  if (min_pi_nb_samples < in->nb_samples && !s->eof)
430  break;
431 
432  in = ff_bufqueue_get(&s->queue);
433 
434  if (av_frame_is_writable(in)) {
435  out = in;
436  } else {
437  out = ff_get_audio_buffer(outlink, in->nb_samples);
438  if (!out) {
439  av_frame_free(&in);
440  return AVERROR(ENOMEM);
441  }
443  }
444 
445  s->filter_channels[s->link](ctx, in, out, in->nb_samples);
446 
447  s->pts = in->pts + av_rescale_q(in->nb_samples, av_make_q(1, outlink->sample_rate),
448  outlink->time_base);
449 
450  if (out != in)
451  av_frame_free(&in);
452  return ff_filter_frame(outlink, out);
453  }
454 
455  for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {
456  AVFrame *in;
457 
459  if (ret < 0)
460  return ret;
461  if (ret == 0)
462  break;
463 
464  ff_bufqueue_add(ctx, &s->queue, in);
465 
466  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
467  ChannelContext *cc = &s->cc[ch];
468 
469  s->analyze_channel(ctx, cc, in->extended_data[ch], in->nb_samples);
470  }
471  }
472 
473  return 1;
474 }
475 
477 {
478  AVFilterLink *inlink = ctx->inputs[0];
479  AVFilterLink *outlink = ctx->outputs[0];
480  SpeechNormalizerContext *s = ctx->priv;
481  int ret, status;
482  int64_t pts;
483 
484  ret = av_channel_layout_copy(&s->ch_layout, &inlink->ch_layout);
485  if (ret < 0)
486  return ret;
487  if (strcmp(s->ch_layout_str, "all"))
488  av_channel_layout_from_string(&s->ch_layout,
489  s->ch_layout_str);
490 
492 
493  ret = filter_frame(ctx);
494  if (ret <= 0)
495  return ret;
496 
497  if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
498  if (status == AVERROR_EOF)
499  s->eof = 1;
500  }
501 
502  if (s->eof && ff_inlink_queued_samples(inlink) == 0 &&
503  s->queue.available == 0) {
504  ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
505  return 0;
506  }
507 
508  if (s->queue.available > 0) {
509  AVFrame *in = ff_bufqueue_peek(&s->queue, 0);
510  const int nb_samples = available_samples(ctx);
511 
512  if (nb_samples >= in->nb_samples || s->eof) {
514  return 0;
515  }
516  }
517 
519 
520  return FFERROR_NOT_READY;
521 }
522 
524 {
525  AVFilterContext *ctx = inlink->dst;
526  SpeechNormalizerContext *s = ctx->priv;
527 
528  s->max_period = inlink->sample_rate / 10;
529 
530  s->prev_gain = 1.;
531  s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc));
532  if (!s->cc)
533  return AVERROR(ENOMEM);
534 
535  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
536  ChannelContext *cc = &s->cc[ch];
537 
538  cc->state = -1;
539  cc->gain_state = s->max_expansion;
540  }
541 
542  switch (inlink->format) {
543  case AV_SAMPLE_FMT_FLTP:
544  s->analyze_channel = analyze_channel_flt;
545  s->filter_channels[0] = filter_channels_flt;
546  s->filter_channels[1] = filter_link_channels_flt;
547  break;
548  case AV_SAMPLE_FMT_DBLP:
549  s->analyze_channel = analyze_channel_dbl;
550  s->filter_channels[0] = filter_channels_dbl;
551  s->filter_channels[1] = filter_link_channels_dbl;
552  break;
553  default:
554  av_assert1(0);
555  }
556 
557  return 0;
558 }
559 
560 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
561  char *res, int res_len, int flags)
562 {
563  SpeechNormalizerContext *s = ctx->priv;
564  int link = s->link;
565  int ret;
566 
567  ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
568  if (ret < 0)
569  return ret;
570  if (link != s->link)
571  s->prev_gain = 1.;
572 
573  return 0;
574 }
575 
577 {
578  SpeechNormalizerContext *s = ctx->priv;
579 
580  ff_bufqueue_discard_all(&s->queue);
581  av_channel_layout_uninit(&s->ch_layout);
582  av_freep(&s->cc);
583 }
584 
585 static const AVFilterPad inputs[] = {
586  {
587  .name = "default",
588  .type = AVMEDIA_TYPE_AUDIO,
589  .config_props = config_input,
590  },
591 };
592 
593 static const AVFilterPad outputs[] = {
594  {
595  .name = "default",
596  .type = AVMEDIA_TYPE_AUDIO,
597  },
598 };
599 
601  .name = "speechnorm",
602  .description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
603  .priv_size = sizeof(SpeechNormalizerContext),
604  .priv_class = &speechnorm_class,
605  .activate = activate,
606  .uninit = uninit,
611  .process_command = process_command,
612 };
inputs
static const AVFilterPad inputs[]
Definition: af_speechnorm.c:585
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:100
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:66
get_pi_samples
static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
Definition: af_speechnorm.c:123
status
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
mix
static int mix(int c0, int c1)
Definition: 4xm.c:717
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
out
FILE * out
Definition: movenc.c:54
consume_pi
static void consume_pi(ChannelContext *cc, int nb_samples)
Definition: af_speechnorm.c:160
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:969
PeriodItem::type
int type
Definition: af_speechnorm.c:47
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
Definition: af_speechnorm.c:560
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:99
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:330
SpeechNormalizerContext::ch_layout
AVChannelLayout ch_layout
Definition: af_speechnorm.c:75
AVOption
AVOption.
Definition: opt.h:251
PeriodItem
Definition: af_speechnorm.c:45
SpeechNormalizerContext::peak_value
double peak_value
Definition: af_speechnorm.c:68
float.h
SpeechNormalizerContext::filter_channels
void(* filter_channels[2])(AVFilterContext *ctx, AVFrame *in, AVFrame *out, int nb_samples)
Definition: af_speechnorm.c:90
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:165
ChannelContext::gain_state
double gain_state
Definition: af_speechnorm.c:56
next_gain
static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state, double pi_rms_sum, int pi_size)
Definition: af_speechnorm.c:169
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
SpeechNormalizerContext::link
int link
Definition: af_speechnorm.c:77
MAX_ITEMS
#define MAX_ITEMS
Definition: af_speechnorm.c:42
ff_bufqueue_get
static AVFrame * ff_bufqueue_get(struct FFBufQueue *queue)
Get the first buffer from the queue and remove it.
Definition: bufferqueue.h:98
SpeechNormalizerContext::threshold_value
double threshold_value
Definition: af_speechnorm.c:71
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1364
dlerp
static double dlerp(double min, double max, double mix)
Definition: af_speechnorm.c:348
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
pts
static int64_t pts
Definition: transcode_aac.c:653
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(speechnorm)
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:49
SpeechNormalizerContext::rms_value
double rms_value
Definition: af_speechnorm.c:67
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
SpeechNormalizerContext::eof
int eof
Definition: af_speechnorm.c:83
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
ChannelContext::pi_rms_sum
double pi_rms_sum
Definition: af_speechnorm.c:58
s
#define s(width, name)
Definition: cbs_vp9.c:256
available_samples
static int available_samples(AVFilterContext *ctx)
Definition: af_speechnorm.c:144
SpeechNormalizerContext::raise_amount
double raise_amount
Definition: af_speechnorm.c:72
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:227
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
filters.h
ctx
AVFormatContext * ctx
Definition: movenc.c:48
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
SpeechNormalizerContext::cc
ChannelContext * cc
Definition: af_speechnorm.c:79
outputs
static const AVFilterPad outputs[]
Definition: af_speechnorm.c:593
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:194
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
min_gain
static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
Definition: af_speechnorm.c:211
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:594
ff_bufqueue_discard_all
static void ff_bufqueue_discard_all(struct FFBufQueue *queue)
Unref and remove all buffers from the queue.
Definition: bufferqueue.h:111
ChannelContext::pi_max_peak
double pi_max_peak
Definition: af_speechnorm.c:57
PeriodItem::size
int size
Definition: af_speechnorm.c:46
ChannelContext::pi_end
int pi_end
Definition: af_speechnorm.c:60
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1318
ff_inlink_queued_frames
size_t ff_inlink_queued_frames(AVFilterLink *link)
Get the number of frames available on the link.
Definition: avfilter.c:1333
bufferqueue.h
f
f
Definition: af_crystalizer.c:122
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:115
AVChannelLayout
An AVChannelLayout holds information about the channel layout of audio data.
Definition: channel_layout.h:301
size
int size
Definition: twinvq_data.h:10344
av_make_q
static AVRational av_make_q(int num, int den)
Create an AVRational.
Definition: rational.h:71
av_frame_is_writable
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:524
SpeechNormalizerContext::max_period
int max_period
Definition: af_speechnorm.c:82
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:842
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
ff_bufqueue_add
static void ff_bufqueue_add(void *log, struct FFBufQueue *queue, AVFrame *buf)
Add a buffer to the queue.
Definition: bufferqueue.h:71
FILTER_CHANNELS
#define FILTER_CHANNELS(name, ptype)
Definition: af_speechnorm.c:314
SpeechNormalizerContext
Definition: af_speechnorm.c:64
internal.h
av_channel_layout_from_string
int av_channel_layout_from_string(AVChannelLayout *channel_layout, const char *str)
Initialize a channel layout from a given string description.
Definition: channel_layout.c:404
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:410
ff_bufqueue_peek
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87
FFBufQueue
Structure holding the queue.
Definition: bufferqueue.h:49
invert
static void invert(float *h, int n)
Definition: asrc_sinc.c:198
AVFrame::extended_data
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:391
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
FLAGS
#define FLAGS
Definition: af_speechnorm.c:95
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
SpeechNormalizerContext::invert
int invert
Definition: af_speechnorm.c:76
SpeechNormalizerContext::queue
struct FFBufQueue queue
Definition: af_speechnorm.c:86
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:55
ff_inlink_queued_samples
int ff_inlink_queued_samples(AVFilterLink *link)
Definition: avfilter.c:1343
ANALYZE_CHANNEL
#define ANALYZE_CHANNEL(name, ptype, zero, min_peak)
Definition: af_speechnorm.c:235
ChannelContext::pi_size
int pi_size
Definition: af_speechnorm.c:61
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:262
SpeechNormalizerContext::fall_amount
double fall_amount
Definition: af_speechnorm.c:73
activate
static int activate(AVFilterContext *ctx)
Definition: af_speechnorm.c:476
AVFilter
Filter definition.
Definition: avfilter.h:161
ret
ret
Definition: filter_design.txt:187
PeriodItem::max_peak
double max_peak
Definition: af_speechnorm.c:48
MIN_PEAK
#define MIN_PEAK
Definition: af_speechnorm.c:43
ChannelContext::bypass
int bypass
Definition: af_speechnorm.c:54
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_speechnorm.c:576
config_input
static int config_input(AVFilterLink *inlink)
Definition: af_speechnorm.c:523
channel_layout.h
next_pi
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
Definition: af_speechnorm.c:189
avfilter.h
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:632
ChannelContext::pi_start
int pi_start
Definition: af_speechnorm.c:59
AV_SAMPLE_FMT_DBLP
@ AV_SAMPLE_FMT_DBLP
double, planar
Definition: samplefmt.h:67
ChannelContext
Definition: hcadec.c:31
ChannelContext::state
int state
Definition: af_speechnorm.c:53
SpeechNormalizerContext::max_compression
double max_compression
Definition: af_speechnorm.c:70
ChannelContext::pi
PeriodItem pi[MAX_ITEMS]
Definition: af_speechnorm.c:55
AVFilterContext
An instance of a filter.
Definition: avfilter.h:392
av_channel_layout_copy
int av_channel_layout_copy(AVChannelLayout *dst, const AVChannelLayout *src)
Make a copy of a channel layout.
Definition: channel_layout.c:639
filter_frame
static int filter_frame(AVFilterContext *ctx)
Definition: af_speechnorm.c:413
FILTER_LINK_CHANNELS
#define FILTER_LINK_CHANNELS(name, ptype, tlerp)
Definition: af_speechnorm.c:358
flerp
static float flerp(float min, float max, float mix)
Definition: af_speechnorm.c:353
audio.h
PeriodItem::rms_sum
double rms_sum
Definition: af_speechnorm.c:49
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:244
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:195
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
SpeechNormalizerContext::ch_layout_str
char * ch_layout_str
Definition: af_speechnorm.c:74
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:150
OFFSET
#define OFFSET(x)
Definition: af_speechnorm.c:94
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Definition: opt.h:229
state
static struct @345 state
SpeechNormalizerContext::pts
int64_t pts
Definition: af_speechnorm.c:84
SpeechNormalizerContext::max_expansion
double max_expansion
Definition: af_speechnorm.c:69
speechnorm_options
static const AVOption speechnorm_options[]
Definition: af_speechnorm.c:97
SpeechNormalizerContext::analyze_channel
void(* analyze_channel)(AVFilterContext *ctx, ChannelContext *cc, const uint8_t *srcp, int nb_samples)
Definition: af_speechnorm.c:88
ff_af_speechnorm
const AVFilter ff_af_speechnorm
Definition: af_speechnorm.c:600
FILTER_SAMPLEFMTS
#define FILTER_SAMPLEFMTS(...)
Definition: internal.h:182
ff_filter_set_ready
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:204
min
float min
Definition: vorbis_enc_data.h:429
SpeechNormalizerContext::prev_gain
double prev_gain
Definition: af_speechnorm.c:80