FFmpeg
af_speechnorm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Paul B Mahol
3  *
4  * Speech Normalizer
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * Speech Normalizer
26  */
27 
28 #include <float.h>
29 
30 #include "libavutil/avassert.h"
32 #include "libavutil/opt.h"
33 
34 #define FF_BUFQUEUE_SIZE (1024)
35 #include "bufferqueue.h"
36 
37 #include "audio.h"
38 #include "avfilter.h"
39 #include "filters.h"
40 #include "internal.h"
41 
42 #define MAX_ITEMS 882000
43 #define MIN_PEAK (1. / 32768.)
44 
45 typedef struct PeriodItem {
46  int size;
47  int type;
48  double max_peak;
49 } PeriodItem;
50 
51 typedef struct ChannelContext {
52  int state;
53  int bypass;
55  double gain_state;
56  double pi_max_peak;
57  int pi_start;
58  int pi_end;
59  int pi_size;
61 
62 typedef struct SpeechNormalizerContext {
63  const AVClass *class;
64 
65  double peak_value;
66  double max_expansion;
69  double raise_amount;
70  double fall_amount;
71  uint64_t channels;
72  int invert;
73  int link;
74 
76  double prev_gain;
77 
79  int eof;
80  int64_t pts;
81 
82  struct FFBufQueue queue;
83 
85  const uint8_t *srcp, int nb_samples);
87  AVFrame *in, AVFrame *out, int nb_samples);
89 
90 #define OFFSET(x) offsetof(SpeechNormalizerContext, x)
91 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
92 
93 static const AVOption speechnorm_options[] = {
94  { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
95  { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
96  { "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
97  { "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
98  { "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
99  { "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
100  { "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
101  { "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
102  { "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
103  { "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
104  { "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
105  { "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
106  { "channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
107  { "h", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
108  { "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
109  { "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
110  { "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
111  { "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
112  { NULL }
113 };
114 
115 AVFILTER_DEFINE_CLASS(speechnorm);
116 
117 static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
118 {
119  int sum;
120 
121  if (pi[start].type == 0)
122  return remain;
123 
124  sum = remain;
125  while (start != end) {
126  start++;
127  if (start >= MAX_ITEMS)
128  start = 0;
129  if (pi[start].type == 0)
130  break;
131  av_assert0(pi[start].size > 0);
132  sum += pi[start].size;
133  }
134 
135  return sum;
136 }
137 
139 {
140  SpeechNormalizerContext *s = ctx->priv;
141  AVFilterLink *inlink = ctx->inputs[0];
142  int min_pi_nb_samples;
143 
144  min_pi_nb_samples = get_pi_samples(s->cc[0].pi, s->cc[0].pi_start, s->cc[0].pi_end, s->cc[0].pi_size);
145  for (int ch = 1; ch < inlink->channels && min_pi_nb_samples > 0; ch++) {
146  ChannelContext *cc = &s->cc[ch];
147 
148  min_pi_nb_samples = FFMIN(min_pi_nb_samples, get_pi_samples(cc->pi, cc->pi_start, cc->pi_end, cc->pi_size));
149  }
150 
151  return min_pi_nb_samples;
152 }
153 
154 static void consume_pi(ChannelContext *cc, int nb_samples)
155 {
156  if (cc->pi_size >= nb_samples) {
157  cc->pi_size -= nb_samples;
158  } else {
159  av_assert0(0);
160  }
161 }
162 
163 static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
164 {
165  SpeechNormalizerContext *s = ctx->priv;
166  const double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
167  const double compression = 1. / s->max_compression;
168  const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
169 
170  if (bypass) {
171  return 1.;
172  } else if (type) {
173  return FFMIN(expansion, state + s->raise_amount);
174  } else {
175  return FFMIN(expansion, FFMAX(compression, state - s->fall_amount));
176  }
177 }
178 
179 static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
180 {
181  av_assert0(cc->pi_size >= 0);
182  if (cc->pi_size == 0) {
183  SpeechNormalizerContext *s = ctx->priv;
184  int start = cc->pi_start;
185 
186  av_assert0(cc->pi[start].size > 0);
187  av_assert0(cc->pi[start].type > 0 || s->eof);
188  cc->pi_size = cc->pi[start].size;
189  cc->pi_max_peak = cc->pi[start].max_peak;
190  av_assert0(cc->pi_start != cc->pi_end || s->eof);
191  start++;
192  if (start >= MAX_ITEMS)
193  start = 0;
194  cc->pi_start = start;
195  cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state);
196  }
197 }
198 
199 static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
200 {
201  SpeechNormalizerContext *s = ctx->priv;
202  double min_gain = s->max_expansion;
203  double gain_state = cc->gain_state;
204  int size = cc->pi_size;
205  int idx = cc->pi_start;
206 
207  min_gain = FFMIN(min_gain, gain_state);
208  while (size <= max_size) {
209  if (idx == cc->pi_end)
210  break;
211  gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state);
212  min_gain = FFMIN(min_gain, gain_state);
213  size += cc->pi[idx].size;
214  idx++;
215  if (idx >= MAX_ITEMS)
216  idx = 0;
217  }
218 
219  return min_gain;
220 }
221 
222 #define ANALYZE_CHANNEL(name, ptype, zero, min_peak) \
223 static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc, \
224  const uint8_t *srcp, int nb_samples) \
225 { \
226  SpeechNormalizerContext *s = ctx->priv; \
227  const ptype *src = (const ptype *)srcp; \
228  int n = 0; \
229  \
230  if (cc->state < 0) \
231  cc->state = src[0] >= zero; \
232  \
233  while (n < nb_samples) { \
234  if ((cc->state != (src[n] >= zero)) || \
235  (cc->pi[cc->pi_end].size > s->max_period)) { \
236  ptype max_peak = cc->pi[cc->pi_end].max_peak; \
237  int state = cc->state; \
238  cc->state = src[n] >= zero; \
239  av_assert0(cc->pi[cc->pi_end].size > 0); \
240  if (max_peak >= min_peak || \
241  cc->pi[cc->pi_end].size > s->max_period) { \
242  cc->pi[cc->pi_end].type = 1; \
243  cc->pi_end++; \
244  if (cc->pi_end >= MAX_ITEMS) \
245  cc->pi_end = 0; \
246  if (cc->state != state) \
247  cc->pi[cc->pi_end].max_peak = DBL_MIN; \
248  else \
249  cc->pi[cc->pi_end].max_peak = max_peak; \
250  cc->pi[cc->pi_end].type = 0; \
251  cc->pi[cc->pi_end].size = 0; \
252  av_assert0(cc->pi_end != cc->pi_start); \
253  } \
254  } \
255  \
256  if (cc->state) { \
257  while (src[n] >= zero) { \
258  cc->pi[cc->pi_end].max_peak = FFMAX(cc->pi[cc->pi_end].max_peak, src[n]); \
259  cc->pi[cc->pi_end].size++; \
260  n++; \
261  if (n >= nb_samples) \
262  break; \
263  } \
264  } else { \
265  while (src[n] < zero) { \
266  cc->pi[cc->pi_end].max_peak = FFMAX(cc->pi[cc->pi_end].max_peak, -src[n]); \
267  cc->pi[cc->pi_end].size++; \
268  n++; \
269  if (n >= nb_samples) \
270  break; \
271  } \
272  } \
273  } \
274 }
275 
276 ANALYZE_CHANNEL(dbl, double, 0.0, MIN_PEAK)
277 ANALYZE_CHANNEL(flt, float, 0.f, (float)MIN_PEAK)
278 
279 #define FILTER_CHANNELS(name, ptype) \
280 static void filter_channels_## name (AVFilterContext *ctx, \
281  AVFrame *in, AVFrame *out, int nb_samples) \
282 { \
283  SpeechNormalizerContext *s = ctx->priv; \
284  AVFilterLink *inlink = ctx->inputs[0]; \
285  \
286  for (int ch = 0; ch < inlink->channels; ch++) { \
287  ChannelContext *cc = &s->cc[ch]; \
288  const ptype *src = (const ptype *)in->extended_data[ch]; \
289  ptype *dst = (ptype *)out->extended_data[ch]; \
290  const int bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
291  int n = 0; \
292  \
293  while (n < nb_samples) { \
294  ptype gain; \
295  int size; \
296  \
297  next_pi(ctx, cc, bypass); \
298  size = FFMIN(nb_samples - n, cc->pi_size); \
299  av_assert0(size > 0); \
300  gain = cc->gain_state; \
301  consume_pi(cc, size); \
302  for (int i = n; !ctx->is_disabled && i < n + size; i++) \
303  dst[i] = src[i] * gain; \
304  n += size; \
305  } \
306  } \
307 }
308 
309 FILTER_CHANNELS(dbl, double)
310 FILTER_CHANNELS(flt, float)
311 
312 static double dlerp(double min, double max, double mix)
313 {
314  return min + (max - min) * mix;
315 }
316 
317 static float flerp(float min, float max, float mix)
318 {
319  return min + (max - min) * mix;
320 }
321 
322 #define FILTER_LINK_CHANNELS(name, ptype, tlerp) \
323 static void filter_link_channels_## name (AVFilterContext *ctx, \
324  AVFrame *in, AVFrame *out, \
325  int nb_samples) \
326 { \
327  SpeechNormalizerContext *s = ctx->priv; \
328  AVFilterLink *inlink = ctx->inputs[0]; \
329  int n = 0; \
330  \
331  while (n < nb_samples) { \
332  int min_size = nb_samples - n; \
333  int max_size = 1; \
334  ptype gain = s->max_expansion; \
335  \
336  for (int ch = 0; ch < inlink->channels; ch++) { \
337  ChannelContext *cc = &s->cc[ch]; \
338  \
339  cc->bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
340  \
341  next_pi(ctx, cc, cc->bypass); \
342  min_size = FFMIN(min_size, cc->pi_size); \
343  max_size = FFMAX(max_size, cc->pi_size); \
344  } \
345  \
346  av_assert0(min_size > 0); \
347  for (int ch = 0; ch < inlink->channels; ch++) { \
348  ChannelContext *cc = &s->cc[ch]; \
349  \
350  if (cc->bypass) \
351  continue; \
352  gain = FFMIN(gain, min_gain(ctx, cc, max_size)); \
353  } \
354  \
355  for (int ch = 0; ch < inlink->channels; ch++) { \
356  ChannelContext *cc = &s->cc[ch]; \
357  const ptype *src = (const ptype *)in->extended_data[ch]; \
358  ptype *dst = (ptype *)out->extended_data[ch]; \
359  \
360  consume_pi(cc, min_size); \
361  if (cc->bypass) \
362  continue; \
363  \
364  for (int i = n; !ctx->is_disabled && i < n + min_size; i++) { \
365  ptype g = tlerp(s->prev_gain, gain, (i - n) / (ptype)min_size); \
366  dst[i] = src[i] * g; \
367  } \
368  } \
369  \
370  s->prev_gain = gain; \
371  n += min_size; \
372  } \
373 }
374 
375 FILTER_LINK_CHANNELS(dbl, double, dlerp)
376 FILTER_LINK_CHANNELS(flt, float, flerp)
377 
379 {
380  SpeechNormalizerContext *s = ctx->priv;
381  AVFilterLink *outlink = ctx->outputs[0];
382  AVFilterLink *inlink = ctx->inputs[0];
383  int ret;
384 
385  while (s->queue.available > 0) {
386  int min_pi_nb_samples;
387  AVFrame *in, *out;
388 
389  in = ff_bufqueue_peek(&s->queue, 0);
390  if (!in)
391  break;
392 
393  min_pi_nb_samples = available_samples(ctx);
394  if (min_pi_nb_samples < in->nb_samples && !s->eof)
395  break;
396 
397  in = ff_bufqueue_get(&s->queue);
398 
399  if (av_frame_is_writable(in)) {
400  out = in;
401  } else {
402  out = ff_get_audio_buffer(outlink, in->nb_samples);
403  if (!out) {
404  av_frame_free(&in);
405  return AVERROR(ENOMEM);
406  }
408  }
409 
410  s->filter_channels[s->link](ctx, in, out, in->nb_samples);
411 
412  s->pts = in->pts + av_rescale_q(in->nb_samples, av_make_q(1, outlink->sample_rate),
413  outlink->time_base);
414 
415  if (out != in)
416  av_frame_free(&in);
417  return ff_filter_frame(outlink, out);
418  }
419 
420  for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {
421  AVFrame *in;
422 
424  if (ret < 0)
425  return ret;
426  if (ret == 0)
427  break;
428 
429  ff_bufqueue_add(ctx, &s->queue, in);
430 
431  for (int ch = 0; ch < inlink->channels; ch++) {
432  ChannelContext *cc = &s->cc[ch];
433 
434  s->analyze_channel(ctx, cc, in->extended_data[ch], in->nb_samples);
435  }
436  }
437 
438  return 1;
439 }
440 
442 {
443  AVFilterLink *inlink = ctx->inputs[0];
444  AVFilterLink *outlink = ctx->outputs[0];
445  SpeechNormalizerContext *s = ctx->priv;
446  int ret, status;
447  int64_t pts;
448 
450 
451  ret = filter_frame(ctx);
452  if (ret <= 0)
453  return ret;
454 
455  if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
456  if (status == AVERROR_EOF)
457  s->eof = 1;
458  }
459 
460  if (s->eof && ff_inlink_queued_samples(inlink) == 0 &&
461  s->queue.available == 0) {
462  ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
463  return 0;
464  }
465 
466  if (s->queue.available > 0) {
467  AVFrame *in = ff_bufqueue_peek(&s->queue, 0);
468  const int nb_samples = available_samples(ctx);
469 
470  if (nb_samples >= in->nb_samples || s->eof) {
472  return 0;
473  }
474  }
475 
477 
478  return FFERROR_NOT_READY;
479 }
480 
482 {
483  AVFilterContext *ctx = inlink->dst;
484  SpeechNormalizerContext *s = ctx->priv;
485 
486  s->max_period = inlink->sample_rate / 10;
487 
488  s->prev_gain = 1.;
489  s->cc = av_calloc(inlink->channels, sizeof(*s->cc));
490  if (!s->cc)
491  return AVERROR(ENOMEM);
492 
493  for (int ch = 0; ch < inlink->channels; ch++) {
494  ChannelContext *cc = &s->cc[ch];
495 
496  cc->state = -1;
497  cc->gain_state = 1.;
498  }
499 
500  switch (inlink->format) {
501  case AV_SAMPLE_FMT_FLTP:
502  s->analyze_channel = analyze_channel_flt;
503  s->filter_channels[0] = filter_channels_flt;
504  s->filter_channels[1] = filter_link_channels_flt;
505  break;
506  case AV_SAMPLE_FMT_DBLP:
507  s->analyze_channel = analyze_channel_dbl;
508  s->filter_channels[0] = filter_channels_dbl;
509  s->filter_channels[1] = filter_link_channels_dbl;
510  break;
511  default:
512  av_assert0(0);
513  }
514 
515  return 0;
516 }
517 
518 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
519  char *res, int res_len, int flags)
520 {
521  SpeechNormalizerContext *s = ctx->priv;
522  int link = s->link;
523  int ret;
524 
525  ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
526  if (ret < 0)
527  return ret;
528  if (link != s->link)
529  s->prev_gain = 1.;
530 
531  return 0;
532 }
533 
535 {
536  SpeechNormalizerContext *s = ctx->priv;
537 
538  ff_bufqueue_discard_all(&s->queue);
539  av_freep(&s->cc);
540 }
541 
542 static const AVFilterPad inputs[] = {
543  {
544  .name = "default",
545  .type = AVMEDIA_TYPE_AUDIO,
546  .config_props = config_input,
547  },
548 };
549 
550 static const AVFilterPad outputs[] = {
551  {
552  .name = "default",
553  .type = AVMEDIA_TYPE_AUDIO,
554  },
555 };
556 
558  .name = "speechnorm",
559  .description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
560  .priv_size = sizeof(SpeechNormalizerContext),
561  .priv_class = &speechnorm_class,
562  .activate = activate,
563  .uninit = uninit,
568  .process_command = process_command,
569 };
inputs
static const AVFilterPad inputs[]
Definition: af_speechnorm.c:542
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:88
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:69
get_pi_samples
static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
Definition: af_speechnorm.c:117
status
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
mix
static int mix(int c0, int c1)
Definition: 4xm.c:716
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
out
FILE * out
Definition: movenc.c:54
consume_pi
static void consume_pi(ChannelContext *cc, int nb_samples)
Definition: af_speechnorm.c:154
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1018
PeriodItem::type
int type
Definition: af_speechnorm.c:47
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
Definition: af_speechnorm.c:518
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:109
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:317
AVOption
AVOption.
Definition: opt.h:247
PeriodItem
Definition: af_speechnorm.c:45
SpeechNormalizerContext::peak_value
double peak_value
Definition: af_speechnorm.c:65
float.h
SpeechNormalizerContext::filter_channels
void(* filter_channels[2])(AVFilterContext *ctx, AVFrame *in, AVFrame *out, int nb_samples)
Definition: af_speechnorm.c:86
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:169
ChannelContext::gain_state
double gain_state
Definition: af_speechnorm.c:55
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
SpeechNormalizerContext::link
int link
Definition: af_speechnorm.c:73
MAX_ITEMS
#define MAX_ITEMS
Definition: af_speechnorm.c:42
ff_bufqueue_get
static AVFrame * ff_bufqueue_get(struct FFBufQueue *queue)
Get the first buffer from the queue and remove it.
Definition: bufferqueue.h:98
SpeechNormalizerContext::threshold_value
double threshold_value
Definition: af_speechnorm.c:68
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1417
dlerp
static double dlerp(double min, double max, double mix)
Definition: af_speechnorm.c:312
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
pts
static int64_t pts
Definition: transcode_aac.c:653
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(speechnorm)
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:50
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
SpeechNormalizerContext::eof
int eof
Definition: af_speechnorm.c:79
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
s
#define s(width, name)
Definition: cbs_vp9.c:257
available_samples
static int available_samples(AVFilterContext *ctx)
Definition: af_speechnorm.c:138
SpeechNormalizerContext::raise_amount
double raise_amount
Definition: af_speechnorm.c:69
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:226
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
filters.h
ctx
AVFormatContext * ctx
Definition: movenc.c:48
channels
channels
Definition: aptx.h:33
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:141
SpeechNormalizerContext::cc
ChannelContext * cc
Definition: af_speechnorm.c:75
outputs
static const AVFilterPad outputs[]
Definition: af_speechnorm.c:550
f
#define f(width, name)
Definition: cbs_vp9.c:255
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:191
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
min_gain
static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
Definition: af_speechnorm.c:199
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:537
ff_bufqueue_discard_all
static void ff_bufqueue_discard_all(struct FFBufQueue *queue)
Unref and remove all buffers from the queue.
Definition: bufferqueue.h:111
ChannelContext::pi_max_peak
double pi_max_peak
Definition: af_speechnorm.c:56
PeriodItem::size
int size
Definition: af_speechnorm.c:46
ChannelContext::pi_end
int pi_end
Definition: af_speechnorm.c:58
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1371
ff_inlink_queued_frames
size_t ff_inlink_queued_frames(AVFilterLink *link)
Get the number of frames available on the link.
Definition: avfilter.c:1386
bufferqueue.h
state
static struct @320 state
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
size
int size
Definition: twinvq_data.h:10344
av_make_q
static AVRational av_make_q(int num, int den)
Create an AVRational.
Definition: rational.h:71
av_frame_is_writable
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:473
SpeechNormalizerContext::max_period
int max_period
Definition: af_speechnorm.c:78
AV_OPT_TYPE_CHANNEL_LAYOUT
@ AV_OPT_TYPE_CHANNEL_LAYOUT
Definition: opt.h:240
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:882
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
ff_bufqueue_add
static void ff_bufqueue_add(void *log, struct FFBufQueue *queue, AVFrame *buf)
Add a buffer to the queue.
Definition: bufferqueue.h:71
FILTER_CHANNELS
#define FILTER_CHANNELS(name, ptype)
Definition: af_speechnorm.c:279
next_gain
static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
Definition: af_speechnorm.c:163
SpeechNormalizerContext
Definition: af_speechnorm.c:62
internal.h
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:397
ff_bufqueue_peek
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87
FFBufQueue
Structure holding the queue.
Definition: bufferqueue.h:49
invert
static void invert(float *h, int n)
Definition: asrc_sinc.c:195
AVFrame::extended_data
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:378
FLAGS
#define FLAGS
Definition: af_speechnorm.c:91
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
SpeechNormalizerContext::invert
int invert
Definition: af_speechnorm.c:72
SpeechNormalizerContext::queue
struct FFBufQueue queue
Definition: af_speechnorm.c:82
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:56
ff_inlink_queued_samples
int ff_inlink_queued_samples(AVFilterLink *link)
Definition: avfilter.c:1396
ANALYZE_CHANNEL
#define ANALYZE_CHANNEL(name, ptype, zero, min_peak)
Definition: af_speechnorm.c:222
ChannelContext::pi_size
int pi_size
Definition: af_speechnorm.c:59
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:271
SpeechNormalizerContext::fall_amount
double fall_amount
Definition: af_speechnorm.c:70
activate
static int activate(AVFilterContext *ctx)
Definition: af_speechnorm.c:441
AVFilter
Filter definition.
Definition: avfilter.h:165
ret
ret
Definition: filter_design.txt:187
PeriodItem::max_peak
double max_peak
Definition: af_speechnorm.c:48
MIN_PEAK
#define MIN_PEAK
Definition: af_speechnorm.c:43
ChannelContext::bypass
int bypass
Definition: af_speechnorm.c:53
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_speechnorm.c:534
config_input
static int config_input(AVFilterLink *inlink)
Definition: af_speechnorm.c:481
channel_layout.h
next_pi
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
Definition: af_speechnorm.c:179
avfilter.h
ChannelContext::pi_start
int pi_start
Definition: af_speechnorm.c:57
AV_SAMPLE_FMT_DBLP
@ AV_SAMPLE_FMT_DBLP
double, planar
Definition: samplefmt.h:70
ChannelContext
Definition: hcadec.c:30
ChannelContext::state
int state
Definition: af_speechnorm.c:52
SpeechNormalizerContext::max_compression
double max_compression
Definition: af_speechnorm.c:67
ChannelContext::pi
PeriodItem pi[MAX_ITEMS]
Definition: af_speechnorm.c:54
AVFilterContext
An instance of a filter.
Definition: avfilter.h:402
filter_frame
static int filter_frame(AVFilterContext *ctx)
Definition: af_speechnorm.c:378
FILTER_LINK_CHANNELS
#define FILTER_LINK_CHANNELS(name, ptype, tlerp)
Definition: af_speechnorm.c:322
flerp
static float flerp(float min, float max, float mix)
Definition: af_speechnorm.c:317
audio.h
SpeechNormalizerContext::channels
uint64_t channels
Definition: af_speechnorm.c:71
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:241
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:192
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:154
OFFSET
#define OFFSET(x)
Definition: af_speechnorm.c:90
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
SpeechNormalizerContext::pts
int64_t pts
Definition: af_speechnorm.c:80
SpeechNormalizerContext::max_expansion
double max_expansion
Definition: af_speechnorm.c:66
speechnorm_options
static const AVOption speechnorm_options[]
Definition: af_speechnorm.c:93
SpeechNormalizerContext::analyze_channel
void(* analyze_channel)(AVFilterContext *ctx, ChannelContext *cc, const uint8_t *srcp, int nb_samples)
Definition: af_speechnorm.c:84
ff_af_speechnorm
const AVFilter ff_af_speechnorm
Definition: af_speechnorm.c:557
FILTER_SAMPLEFMTS
#define FILTER_SAMPLEFMTS(...)
Definition: internal.h:179
ff_filter_set_ready
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:211
min
float min
Definition: vorbis_enc_data.h:429
SpeechNormalizerContext::prev_gain
double prev_gain
Definition: af_speechnorm.c:76