FFmpeg
af_speechnorm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Paul B Mahol
3  *
4  * Speech Normalizer
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * Speech Normalizer
26  */
27 
28 #include <float.h>
29 
30 #include "libavutil/avassert.h"
32 #include "libavutil/opt.h"
33 
34 #define FF_BUFQUEUE_SIZE (1024)
35 #include "bufferqueue.h"
36 
37 #include "audio.h"
38 #include "avfilter.h"
39 #include "filters.h"
40 #include "internal.h"
41 
42 #define MAX_ITEMS 882000
43 #define MIN_PEAK (1. / 32768.)
44 
45 typedef struct PeriodItem {
46  int size;
47  int type;
48  double max_peak;
49 } PeriodItem;
50 
51 typedef struct ChannelContext {
52  int state;
53  int bypass;
55  double gain_state;
56  double pi_max_peak;
57  int pi_start;
58  int pi_end;
59  int pi_size;
61 
62 typedef struct SpeechNormalizerContext {
63  const AVClass *class;
64 
65  double peak_value;
66  double max_expansion;
69  double raise_amount;
70  double fall_amount;
71  uint64_t channels;
72  int invert;
73  int link;
74 
76  double prev_gain;
77 
79  int eof;
80  int64_t pts;
81 
82  struct FFBufQueue queue;
83 
85  const uint8_t *srcp, int nb_samples);
87  AVFrame *in, int nb_samples);
89 
90 #define OFFSET(x) offsetof(SpeechNormalizerContext, x)
91 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
92 
93 static const AVOption speechnorm_options[] = {
94  { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
95  { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
96  { "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
97  { "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
98  { "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
99  { "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
100  { "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
101  { "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
102  { "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
103  { "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
104  { "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
105  { "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
106  { "channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
107  { "h", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
108  { "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
109  { "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
110  { "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
111  { "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
112  { NULL }
113 };
114 
115 AVFILTER_DEFINE_CLASS(speechnorm);
116 
118 {
121  static const enum AVSampleFormat sample_fmts[] = {
124  };
125  int ret;
126 
128  if (!layouts)
129  return AVERROR(ENOMEM);
131  if (ret < 0)
132  return ret;
133 
135  if (!formats)
136  return AVERROR(ENOMEM);
138  if (ret < 0)
139  return ret;
140 
142  if (!formats)
143  return AVERROR(ENOMEM);
145 }
146 
147 static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
148 {
149  int sum;
150 
151  if (pi[start].type == 0)
152  return remain;
153 
154  sum = remain;
155  while (start != end) {
156  start++;
157  if (start >= MAX_ITEMS)
158  start = 0;
159  if (pi[start].type == 0)
160  break;
161  av_assert0(pi[start].size > 0);
162  sum += pi[start].size;
163  }
164 
165  return sum;
166 }
167 
169 {
170  SpeechNormalizerContext *s = ctx->priv;
171  AVFilterLink *inlink = ctx->inputs[0];
172  int min_pi_nb_samples;
173 
174  min_pi_nb_samples = get_pi_samples(s->cc[0].pi, s->cc[0].pi_start, s->cc[0].pi_end, s->cc[0].pi_size);
175  for (int ch = 1; ch < inlink->channels && min_pi_nb_samples > 0; ch++) {
176  ChannelContext *cc = &s->cc[ch];
177 
178  min_pi_nb_samples = FFMIN(min_pi_nb_samples, get_pi_samples(cc->pi, cc->pi_start, cc->pi_end, cc->pi_size));
179  }
180 
181  return min_pi_nb_samples;
182 }
183 
184 static void consume_pi(ChannelContext *cc, int nb_samples)
185 {
186  if (cc->pi_size >= nb_samples) {
187  cc->pi_size -= nb_samples;
188  } else {
189  av_assert0(0);
190  }
191 }
192 
193 static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
194 {
195  SpeechNormalizerContext *s = ctx->priv;
196  const double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
197  const double compression = 1. / s->max_compression;
198  const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
199 
200  if (bypass) {
201  return 1.;
202  } else if (type) {
203  return FFMIN(expansion, state + s->raise_amount);
204  } else {
205  return FFMIN(expansion, FFMAX(compression, state - s->fall_amount));
206  }
207 }
208 
209 static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
210 {
211  av_assert0(cc->pi_size >= 0);
212  if (cc->pi_size == 0) {
213  SpeechNormalizerContext *s = ctx->priv;
214  int start = cc->pi_start;
215 
216  av_assert0(cc->pi[start].size > 0);
217  av_assert0(cc->pi[start].type > 0 || s->eof);
218  cc->pi_size = cc->pi[start].size;
219  cc->pi_max_peak = cc->pi[start].max_peak;
220  av_assert0(cc->pi_start != cc->pi_end || s->eof);
221  start++;
222  if (start >= MAX_ITEMS)
223  start = 0;
224  cc->pi_start = start;
225  cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state);
226  }
227 }
228 
229 static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
230 {
231  SpeechNormalizerContext *s = ctx->priv;
232  double min_gain = s->max_expansion;
233  double gain_state = cc->gain_state;
234  int size = cc->pi_size;
235  int idx = cc->pi_start;
236 
237  min_gain = FFMIN(min_gain, gain_state);
238  while (size <= max_size) {
239  if (idx == cc->pi_end)
240  break;
241  gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state);
242  min_gain = FFMIN(min_gain, gain_state);
243  size += cc->pi[idx].size;
244  idx++;
245  if (idx >= MAX_ITEMS)
246  idx = 0;
247  }
248 
249  return min_gain;
250 }
251 
252 #define ANALYZE_CHANNEL(name, ptype, zero) \
253 static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc, \
254  const uint8_t *srcp, int nb_samples) \
255 { \
256  SpeechNormalizerContext *s = ctx->priv; \
257  const ptype *src = (const ptype *)srcp; \
258  int n = 0; \
259  \
260  if (cc->state < 0) \
261  cc->state = src[0] >= zero; \
262  \
263  while (n < nb_samples) { \
264  if ((cc->state != (src[n] >= zero)) || \
265  (cc->pi[cc->pi_end].size > s->max_period)) { \
266  double max_peak = cc->pi[cc->pi_end].max_peak; \
267  int state = cc->state; \
268  cc->state = src[n] >= zero; \
269  av_assert0(cc->pi[cc->pi_end].size > 0); \
270  if (cc->pi[cc->pi_end].max_peak >= MIN_PEAK || \
271  cc->pi[cc->pi_end].size > s->max_period) { \
272  cc->pi[cc->pi_end].type = 1; \
273  cc->pi_end++; \
274  if (cc->pi_end >= MAX_ITEMS) \
275  cc->pi_end = 0; \
276  if (cc->state != state) \
277  cc->pi[cc->pi_end].max_peak = DBL_MIN; \
278  else \
279  cc->pi[cc->pi_end].max_peak = max_peak; \
280  cc->pi[cc->pi_end].type = 0; \
281  cc->pi[cc->pi_end].size = 0; \
282  av_assert0(cc->pi_end != cc->pi_start); \
283  } \
284  } \
285  \
286  if (cc->state) { \
287  while (src[n] >= zero) { \
288  cc->pi[cc->pi_end].max_peak = FFMAX(cc->pi[cc->pi_end].max_peak, src[n]); \
289  cc->pi[cc->pi_end].size++; \
290  n++; \
291  if (n >= nb_samples) \
292  break; \
293  } \
294  } else { \
295  while (src[n] < zero) { \
296  cc->pi[cc->pi_end].max_peak = FFMAX(cc->pi[cc->pi_end].max_peak, -src[n]); \
297  cc->pi[cc->pi_end].size++; \
298  n++; \
299  if (n >= nb_samples) \
300  break; \
301  } \
302  } \
303  } \
304 }
305 
306 ANALYZE_CHANNEL(dbl, double, 0.0)
307 ANALYZE_CHANNEL(flt, float, 0.f)
308 
309 #define FILTER_CHANNELS(name, ptype) \
310 static void filter_channels_## name (AVFilterContext *ctx, \
311  AVFrame *in, int nb_samples) \
312 { \
313  SpeechNormalizerContext *s = ctx->priv; \
314  AVFilterLink *inlink = ctx->inputs[0]; \
315  \
316  for (int ch = 0; ch < inlink->channels; ch++) { \
317  ChannelContext *cc = &s->cc[ch]; \
318  ptype *dst = (ptype *)in->extended_data[ch]; \
319  const int bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
320  int n = 0; \
321  \
322  while (n < nb_samples) { \
323  ptype gain; \
324  int size; \
325  \
326  next_pi(ctx, cc, bypass); \
327  size = FFMIN(nb_samples - n, cc->pi_size); \
328  av_assert0(size > 0); \
329  gain = cc->gain_state; \
330  consume_pi(cc, size); \
331  for (int i = n; i < n + size; i++) \
332  dst[i] *= gain; \
333  n += size; \
334  } \
335  } \
336 }
337 
338 FILTER_CHANNELS(dbl, double)
339 FILTER_CHANNELS(flt, float)
340 
341 static double lerp(double min, double max, double mix)
342 {
343  return min + (max - min) * mix;
344 }
345 
346 #define FILTER_LINK_CHANNELS(name, ptype) \
347 static void filter_link_channels_## name (AVFilterContext *ctx, \
348  AVFrame *in, int nb_samples) \
349 { \
350  SpeechNormalizerContext *s = ctx->priv; \
351  AVFilterLink *inlink = ctx->inputs[0]; \
352  int n = 0; \
353  \
354  while (n < nb_samples) { \
355  int min_size = nb_samples - n; \
356  int max_size = 1; \
357  ptype gain = s->max_expansion; \
358  \
359  for (int ch = 0; ch < inlink->channels; ch++) { \
360  ChannelContext *cc = &s->cc[ch]; \
361  \
362  cc->bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
363  \
364  next_pi(ctx, cc, cc->bypass); \
365  min_size = FFMIN(min_size, cc->pi_size); \
366  max_size = FFMAX(max_size, cc->pi_size); \
367  } \
368  \
369  av_assert0(min_size > 0); \
370  for (int ch = 0; ch < inlink->channels; ch++) { \
371  ChannelContext *cc = &s->cc[ch]; \
372  \
373  if (cc->bypass) \
374  continue; \
375  gain = FFMIN(gain, min_gain(ctx, cc, max_size)); \
376  } \
377  \
378  for (int ch = 0; ch < inlink->channels; ch++) { \
379  ChannelContext *cc = &s->cc[ch]; \
380  ptype *dst = (ptype *)in->extended_data[ch]; \
381  \
382  consume_pi(cc, min_size); \
383  if (cc->bypass) \
384  continue; \
385  \
386  for (int i = n; i < n + min_size; i++) { \
387  ptype g = lerp(s->prev_gain, gain, (i - n) / (double)min_size); \
388  dst[i] *= g; \
389  } \
390  } \
391  \
392  s->prev_gain = gain; \
393  n += min_size; \
394  } \
395 }
396 
397 FILTER_LINK_CHANNELS(dbl, double)
398 FILTER_LINK_CHANNELS(flt, float)
399 
401 {
402  SpeechNormalizerContext *s = ctx->priv;
403  AVFilterLink *outlink = ctx->outputs[0];
404  AVFilterLink *inlink = ctx->inputs[0];
405  int ret;
406 
407  while (s->queue.available > 0) {
408  int min_pi_nb_samples;
409  AVFrame *in;
410 
411  in = ff_bufqueue_peek(&s->queue, 0);
412  if (!in)
413  break;
414 
415  min_pi_nb_samples = available_samples(ctx);
416  if (min_pi_nb_samples < in->nb_samples && !s->eof)
417  break;
418 
419  in = ff_bufqueue_get(&s->queue);
420 
422 
423  s->filter_channels[s->link](ctx, in, in->nb_samples);
424 
425  s->pts = in->pts + in->nb_samples;
426 
427  return ff_filter_frame(outlink, in);
428  }
429 
430  for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {
431  AVFrame *in;
432 
434  if (ret < 0)
435  return ret;
436  if (ret == 0)
437  break;
438 
439  ff_bufqueue_add(ctx, &s->queue, in);
440 
441  for (int ch = 0; ch < inlink->channels; ch++) {
442  ChannelContext *cc = &s->cc[ch];
443 
444  s->analyze_channel(ctx, cc, in->extended_data[ch], in->nb_samples);
445  }
446  }
447 
448  return 1;
449 }
450 
452 {
453  AVFilterLink *inlink = ctx->inputs[0];
454  AVFilterLink *outlink = ctx->outputs[0];
455  SpeechNormalizerContext *s = ctx->priv;
456  int ret, status;
457  int64_t pts;
458 
460 
461  ret = filter_frame(ctx);
462  if (ret <= 0)
463  return ret;
464 
465  if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
466  if (status == AVERROR_EOF)
467  s->eof = 1;
468  }
469 
470  if (s->eof && ff_inlink_queued_samples(inlink) == 0 &&
471  s->queue.available == 0) {
472  ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
473  return 0;
474  }
475 
476  if (s->queue.available > 0) {
477  AVFrame *in = ff_bufqueue_peek(&s->queue, 0);
478  const int nb_samples = available_samples(ctx);
479 
480  if (nb_samples >= in->nb_samples || s->eof) {
482  return 0;
483  }
484  }
485 
487 
488  return FFERROR_NOT_READY;
489 }
490 
492 {
493  AVFilterContext *ctx = inlink->dst;
494  SpeechNormalizerContext *s = ctx->priv;
495 
496  s->max_period = inlink->sample_rate / 10;
497 
498  s->prev_gain = 1.;
499  s->cc = av_calloc(inlink->channels, sizeof(*s->cc));
500  if (!s->cc)
501  return AVERROR(ENOMEM);
502 
503  for (int ch = 0; ch < inlink->channels; ch++) {
504  ChannelContext *cc = &s->cc[ch];
505 
506  cc->state = -1;
507  cc->gain_state = 1.;
508  }
509 
510  switch (inlink->format) {
511  case AV_SAMPLE_FMT_FLTP:
512  s->analyze_channel = analyze_channel_flt;
513  s->filter_channels[0] = filter_channels_flt;
514  s->filter_channels[1] = filter_link_channels_flt;
515  break;
516  case AV_SAMPLE_FMT_DBLP:
517  s->analyze_channel = analyze_channel_dbl;
518  s->filter_channels[0] = filter_channels_dbl;
519  s->filter_channels[1] = filter_link_channels_dbl;
520  break;
521  default:
522  av_assert0(0);
523  }
524 
525  return 0;
526 }
527 
528 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
529  char *res, int res_len, int flags)
530 {
531  SpeechNormalizerContext *s = ctx->priv;
532  int link = s->link;
533  int ret;
534 
535  ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
536  if (ret < 0)
537  return ret;
538  if (link != s->link)
539  s->prev_gain = 1.;
540 
541  return 0;
542 }
543 
545 {
546  SpeechNormalizerContext *s = ctx->priv;
547 
548  ff_bufqueue_discard_all(&s->queue);
549  av_freep(&s->cc);
550 }
551 
552 static const AVFilterPad inputs[] = {
553  {
554  .name = "default",
555  .type = AVMEDIA_TYPE_AUDIO,
556  .config_props = config_input,
557  },
558  { NULL }
559 };
560 
561 static const AVFilterPad outputs[] = {
562  {
563  .name = "default",
564  .type = AVMEDIA_TYPE_AUDIO,
565  },
566  { NULL }
567 };
568 
570  .name = "speechnorm",
571  .description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
572  .query_formats = query_formats,
573  .priv_size = sizeof(SpeechNormalizerContext),
574  .priv_class = &speechnorm_class,
575  .activate = activate,
576  .uninit = uninit,
577  .inputs = inputs,
578  .outputs = outputs,
580 };
formats
formats
Definition: signature.h:48
inputs
static const AVFilterPad inputs[]
Definition: af_speechnorm.c:552
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:69
AVFilterChannelLayouts
A list of supported channel layouts.
Definition: formats.h:85
get_pi_samples
static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
Definition: af_speechnorm.c:147
status
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
mix
static int mix(int c0, int c1)
Definition: 4xm.c:716
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
ff_make_format_list
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
consume_pi
static void consume_pi(ChannelContext *cc, int nb_samples)
Definition: af_speechnorm.c:184
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:978
sample_fmts
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:948
PeriodItem::type
int type
Definition: af_speechnorm.c:47
layouts
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:55
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
Definition: af_speechnorm.c:528
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
ff_all_channel_counts
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition.
Definition: formats.c:429
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:303
av_frame_make_writable
int av_frame_make_writable(AVFrame *frame)
Ensure that the frame data is writable, avoiding data copy if possible.
Definition: frame.c:490
AVOption
AVOption.
Definition: opt.h:247
PeriodItem
Definition: af_speechnorm.c:45
SpeechNormalizerContext::peak_value
double peak_value
Definition: af_speechnorm.c:65
float.h
max
#define max(a, b)
Definition: cuda_runtime.h:33
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:149
ChannelContext::gain_state
double gain_state
Definition: af_speechnorm.c:55
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
SpeechNormalizerContext::link
int link
Definition: af_speechnorm.c:73
MAX_ITEMS
#define MAX_ITEMS
Definition: af_speechnorm.c:42
ff_bufqueue_get
static AVFrame * ff_bufqueue_get(struct FFBufQueue *queue)
Get the first buffer from the queue and remove it.
Definition: bufferqueue.h:98
SpeechNormalizerContext::threshold_value
double threshold_value
Definition: af_speechnorm.c:68
AVFilterFormats
A list of supported formats for one end of a filter link.
Definition: formats.h:64
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1376
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
pts
static int64_t pts
Definition: transcode_aac.c:653
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(speechnorm)
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:54
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
ff_set_common_formats
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:580
SpeechNormalizerContext::eof
int eof
Definition: af_speechnorm.c:79
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
s
#define s(width, name)
Definition: cbs_vp9.c:257
available_samples
static int available_samples(AVFilterContext *ctx)
Definition: af_speechnorm.c:168
SpeechNormalizerContext::raise_amount
double raise_amount
Definition: af_speechnorm.c:69
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:226
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
filters.h
ctx
AVFormatContext * ctx
Definition: movenc.c:48
channels
channels
Definition: aptx.h:33
SpeechNormalizerContext::cc
ChannelContext * cc
Definition: af_speechnorm.c:75
outputs
static const AVFilterPad outputs[]
Definition: af_speechnorm.c:561
f
#define f(width, name)
Definition: cbs_vp9.c:255
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
FILTER_LINK_CHANNELS
#define FILTER_LINK_CHANNELS(name, ptype)
Definition: af_speechnorm.c:346
min_gain
static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
Definition: af_speechnorm.c:229
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
ANALYZE_CHANNEL
#define ANALYZE_CHANNEL(name, ptype, zero)
Definition: af_speechnorm.c:252
state
static struct @317 state
ff_bufqueue_discard_all
static void ff_bufqueue_discard_all(struct FFBufQueue *queue)
Unref and remove all buffers from the queue.
Definition: bufferqueue.h:111
ChannelContext::pi_max_peak
double pi_max_peak
Definition: af_speechnorm.c:56
PeriodItem::size
int size
Definition: af_speechnorm.c:46
ChannelContext::pi_end
int pi_end
Definition: af_speechnorm.c:58
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1331
ff_inlink_queued_frames
size_t ff_inlink_queued_frames(AVFilterLink *link)
Get the number of frames available on the link.
Definition: avfilter.c:1346
bufferqueue.h
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:116
FFMAX
#define FFMAX(a, b)
Definition: common.h:103
AV_SAMPLE_FMT_NONE
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:59
size
int size
Definition: twinvq_data.h:10344
SpeechNormalizerContext::max_period
int max_period
Definition: af_speechnorm.c:78
AV_OPT_TYPE_CHANNEL_LAYOUT
@ AV_OPT_TYPE_CHANNEL_LAYOUT
Definition: opt.h:240
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:842
FFMIN
#define FFMIN(a, b)
Definition: common.h:105
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
ff_bufqueue_add
static void ff_bufqueue_add(void *log, struct FFBufQueue *queue, AVFrame *buf)
Add a buffer to the queue.
Definition: bufferqueue.h:71
FILTER_CHANNELS
#define FILTER_CHANNELS(name, ptype)
Definition: af_speechnorm.c:309
next_gain
static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
Definition: af_speechnorm.c:193
SpeechNormalizerContext
Definition: af_speechnorm.c:62
internal.h
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:369
ff_bufqueue_peek
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87
FFBufQueue
Structure holding the queue.
Definition: bufferqueue.h:49
invert
static void invert(float *h, int n)
Definition: asrc_sinc.c:202
AVFrame::extended_data
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:350
AVSampleFormat
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
FLAGS
#define FLAGS
Definition: af_speechnorm.c:91
SpeechNormalizerContext::invert
int invert
Definition: af_speechnorm.c:72
SpeechNormalizerContext::filter_channels
void(* filter_channels[2])(AVFilterContext *ctx, AVFrame *in, int nb_samples)
Definition: af_speechnorm.c:86
SpeechNormalizerContext::queue
struct FFBufQueue queue
Definition: af_speechnorm.c:82
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:60
ff_inlink_queued_samples
int ff_inlink_queued_samples(AVFilterLink *link)
Definition: avfilter.c:1356
ChannelContext::pi_size
int pi_size
Definition: af_speechnorm.c:59
SpeechNormalizerContext::fall_amount
double fall_amount
Definition: af_speechnorm.c:70
activate
static int activate(AVFilterContext *ctx)
Definition: af_speechnorm.c:451
AVFilter
Filter definition.
Definition: avfilter.h:145
ret
ret
Definition: filter_design.txt:187
PeriodItem::max_peak
double max_peak
Definition: af_speechnorm.c:48
ChannelContext::bypass
int bypass
Definition: af_speechnorm.c:53
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_speechnorm.c:544
ff_all_samplerates
AVFilterFormats * ff_all_samplerates(void)
Definition: formats.c:414
config_input
static int config_input(AVFilterLink *inlink)
Definition: af_speechnorm.c:491
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:251
channel_layout.h
next_pi
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
Definition: af_speechnorm.c:209
avfilter.h
ChannelContext::pi_start
int pi_start
Definition: af_speechnorm.c:57
AV_SAMPLE_FMT_DBLP
@ AV_SAMPLE_FMT_DBLP
double, planar
Definition: samplefmt.h:70
ChannelContext
Definition: hcadec.c:30
ChannelContext::state
int state
Definition: af_speechnorm.c:52
SpeechNormalizerContext::max_compression
double max_compression
Definition: af_speechnorm.c:67
ChannelContext::pi
PeriodItem pi[MAX_ITEMS]
Definition: af_speechnorm.c:54
AVFilterContext
An instance of a filter.
Definition: avfilter.h:333
filter_frame
static int filter_frame(AVFilterContext *ctx)
Definition: af_speechnorm.c:400
audio.h
SpeechNormalizerContext::channels
uint64_t channels
Definition: af_speechnorm.c:71
query_formats
static int query_formats(AVFilterContext *ctx)
Definition: af_speechnorm.c:117
lerp
static double lerp(double min, double max, double mix)
Definition: af_speechnorm.c:341
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:241
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
OFFSET
#define OFFSET(x)
Definition: af_speechnorm.c:90
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
ff_set_common_samplerates
int ff_set_common_samplerates(AVFilterContext *ctx, AVFilterFormats *samplerates)
Definition: formats.c:568
SpeechNormalizerContext::pts
int64_t pts
Definition: af_speechnorm.c:80
SpeechNormalizerContext::max_expansion
double max_expansion
Definition: af_speechnorm.c:66
speechnorm_options
static const AVOption speechnorm_options[]
Definition: af_speechnorm.c:93
SpeechNormalizerContext::analyze_channel
void(* analyze_channel)(AVFilterContext *ctx, ChannelContext *cc, const uint8_t *srcp, int nb_samples)
Definition: af_speechnorm.c:84
ff_af_speechnorm
const AVFilter ff_af_speechnorm
Definition: af_speechnorm.c:569
ff_filter_set_ready
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:186
ff_set_common_channel_layouts
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *channel_layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates.
Definition: formats.c:561
min
float min
Definition: vorbis_enc_data.h:429
SpeechNormalizerContext::prev_gain
double prev_gain
Definition: af_speechnorm.c:76