FFmpeg
af_speechnorm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Paul B Mahol
3  *
4  * Speech Normalizer
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * Speech Normalizer
26  */
27 
28 #include <float.h>
29 
30 #include "libavutil/avassert.h"
31 #include "libavutil/opt.h"
32 
33 #define FF_BUFQUEUE_SIZE (1024)
34 #include "bufferqueue.h"
35 
36 #include "audio.h"
37 #include "avfilter.h"
38 #include "filters.h"
39 #include "internal.h"
40 
41 #define MAX_ITEMS 882000
42 #define MIN_PEAK (1. / 32768.)
43 
44 typedef struct PeriodItem {
45  int size;
46  int type;
47  double max_peak;
48 } PeriodItem;
49 
50 typedef struct ChannelContext {
51  int state;
52  int bypass;
54  double gain_state;
55  double pi_max_peak;
56  int pi_start;
57  int pi_end;
58  int pi_size;
60 
61 typedef struct SpeechNormalizerContext {
62  const AVClass *class;
63 
64  double peak_value;
65  double max_expansion;
68  double raise_amount;
69  double fall_amount;
70  uint64_t channels;
71  int invert;
72  int link;
73 
75  double prev_gain;
76 
78  int eof;
79  int64_t pts;
80 
81  struct FFBufQueue queue;
82 
84  const uint8_t *srcp, int nb_samples);
86  AVFrame *in, int nb_samples);
88 
89 #define OFFSET(x) offsetof(SpeechNormalizerContext, x)
90 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
91 
92 static const AVOption speechnorm_options[] = {
93  { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
94  { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
95  { "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
96  { "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
97  { "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
98  { "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
99  { "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
100  { "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
101  { "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
102  { "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
103  { "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
104  { "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
105  { "channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
106  { "h", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
107  { "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
108  { "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
109  { "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
110  { "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
111  { NULL }
112 };
113 
114 AVFILTER_DEFINE_CLASS(speechnorm);
115 
117 {
120  static const enum AVSampleFormat sample_fmts[] = {
123  };
124  int ret;
125 
127  if (!layouts)
128  return AVERROR(ENOMEM);
130  if (ret < 0)
131  return ret;
132 
134  if (!formats)
135  return AVERROR(ENOMEM);
137  if (ret < 0)
138  return ret;
139 
141  if (!formats)
142  return AVERROR(ENOMEM);
144 }
145 
146 static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
147 {
148  int sum;
149 
150  if (pi[start].type == 0)
151  return remain;
152 
153  sum = remain;
154  while (start != end) {
155  start++;
156  if (start >= MAX_ITEMS)
157  start = 0;
158  if (pi[start].type == 0)
159  break;
160  av_assert0(pi[start].size > 0);
161  sum += pi[start].size;
162  }
163 
164  return sum;
165 }
166 
168 {
169  SpeechNormalizerContext *s = ctx->priv;
170  AVFilterLink *inlink = ctx->inputs[0];
171  int min_pi_nb_samples;
172 
173  min_pi_nb_samples = get_pi_samples(s->cc[0].pi, s->cc[0].pi_start, s->cc[0].pi_end, s->cc[0].pi_size);
174  for (int ch = 1; ch < inlink->channels && min_pi_nb_samples > 0; ch++) {
175  ChannelContext *cc = &s->cc[ch];
176 
177  min_pi_nb_samples = FFMIN(min_pi_nb_samples, get_pi_samples(cc->pi, cc->pi_start, cc->pi_end, cc->pi_size));
178  }
179 
180  return min_pi_nb_samples;
181 }
182 
183 static void consume_pi(ChannelContext *cc, int nb_samples)
184 {
185  if (cc->pi_size >= nb_samples) {
186  cc->pi_size -= nb_samples;
187  } else {
188  av_assert0(0);
189  }
190 }
191 
192 static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
193 {
194  SpeechNormalizerContext *s = ctx->priv;
195  const double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
196  const double compression = 1. / s->max_compression;
197  const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
198 
199  if (bypass) {
200  return 1.;
201  } else if (type) {
202  return FFMIN(expansion, state + s->raise_amount);
203  } else {
204  return FFMIN(expansion, FFMAX(compression, state - s->fall_amount));
205  }
206 }
207 
208 static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
209 {
210  av_assert0(cc->pi_size >= 0);
211  if (cc->pi_size == 0) {
212  SpeechNormalizerContext *s = ctx->priv;
213  int start = cc->pi_start;
214 
215  av_assert0(cc->pi[start].size > 0);
216  av_assert0(cc->pi[start].type > 0 || s->eof);
217  cc->pi_size = cc->pi[start].size;
218  cc->pi_max_peak = cc->pi[start].max_peak;
219  av_assert0(cc->pi_start != cc->pi_end || s->eof);
220  start++;
221  if (start >= MAX_ITEMS)
222  start = 0;
223  cc->pi_start = start;
224  cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state);
225  }
226 }
227 
228 static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
229 {
230  SpeechNormalizerContext *s = ctx->priv;
231  double min_gain = s->max_expansion;
232  double gain_state = cc->gain_state;
233  int size = cc->pi_size;
234  int idx = cc->pi_start;
235 
236  min_gain = FFMIN(min_gain, gain_state);
237  while (size <= max_size) {
238  if (idx == cc->pi_end)
239  break;
240  gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state);
241  min_gain = FFMIN(min_gain, gain_state);
242  size += cc->pi[idx].size;
243  idx++;
244  if (idx >= MAX_ITEMS)
245  idx = 0;
246  }
247 
248  return min_gain;
249 }
250 
251 #define ANALYZE_CHANNEL(name, ptype, zero) \
252 static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc, \
253  const uint8_t *srcp, int nb_samples) \
254 { \
255  SpeechNormalizerContext *s = ctx->priv; \
256  const ptype *src = (const ptype *)srcp; \
257  int n = 0; \
258  \
259  if (cc->state < 0) \
260  cc->state = src[0] >= zero; \
261  \
262  while (n < nb_samples) { \
263  if ((cc->state != (src[n] >= zero)) || \
264  (cc->pi[cc->pi_end].size > s->max_period)) { \
265  double max_peak = cc->pi[cc->pi_end].max_peak; \
266  int state = cc->state; \
267  cc->state = src[n] >= zero; \
268  av_assert0(cc->pi[cc->pi_end].size > 0); \
269  if (cc->pi[cc->pi_end].max_peak >= MIN_PEAK || \
270  cc->pi[cc->pi_end].size > s->max_period) { \
271  cc->pi[cc->pi_end].type = 1; \
272  cc->pi_end++; \
273  if (cc->pi_end >= MAX_ITEMS) \
274  cc->pi_end = 0; \
275  if (cc->state != state) \
276  cc->pi[cc->pi_end].max_peak = DBL_MIN; \
277  else \
278  cc->pi[cc->pi_end].max_peak = max_peak; \
279  cc->pi[cc->pi_end].type = 0; \
280  cc->pi[cc->pi_end].size = 0; \
281  av_assert0(cc->pi_end != cc->pi_start); \
282  } \
283  } \
284  \
285  if (cc->state) { \
286  while (src[n] >= zero) { \
287  cc->pi[cc->pi_end].max_peak = FFMAX(cc->pi[cc->pi_end].max_peak, src[n]); \
288  cc->pi[cc->pi_end].size++; \
289  n++; \
290  if (n >= nb_samples) \
291  break; \
292  } \
293  } else { \
294  while (src[n] < zero) { \
295  cc->pi[cc->pi_end].max_peak = FFMAX(cc->pi[cc->pi_end].max_peak, -src[n]); \
296  cc->pi[cc->pi_end].size++; \
297  n++; \
298  if (n >= nb_samples) \
299  break; \
300  } \
301  } \
302  } \
303 }
304 
305 ANALYZE_CHANNEL(dbl, double, 0.0)
306 ANALYZE_CHANNEL(flt, float, 0.f)
307 
308 #define FILTER_CHANNELS(name, ptype) \
309 static void filter_channels_## name (AVFilterContext *ctx, \
310  AVFrame *in, int nb_samples) \
311 { \
312  SpeechNormalizerContext *s = ctx->priv; \
313  AVFilterLink *inlink = ctx->inputs[0]; \
314  \
315  for (int ch = 0; ch < inlink->channels; ch++) { \
316  ChannelContext *cc = &s->cc[ch]; \
317  ptype *dst = (ptype *)in->extended_data[ch]; \
318  const int bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
319  int n = 0; \
320  \
321  while (n < nb_samples) { \
322  ptype gain; \
323  int size; \
324  \
325  next_pi(ctx, cc, bypass); \
326  size = FFMIN(nb_samples - n, cc->pi_size); \
327  av_assert0(size > 0); \
328  gain = cc->gain_state; \
329  consume_pi(cc, size); \
330  for (int i = n; i < n + size; i++) \
331  dst[i] *= gain; \
332  n += size; \
333  } \
334  } \
335 }
336 
337 FILTER_CHANNELS(dbl, double)
338 FILTER_CHANNELS(flt, float)
339 
340 static double lerp(double min, double max, double mix)
341 {
342  return min + (max - min) * mix;
343 }
344 
345 #define FILTER_LINK_CHANNELS(name, ptype) \
346 static void filter_link_channels_## name (AVFilterContext *ctx, \
347  AVFrame *in, int nb_samples) \
348 { \
349  SpeechNormalizerContext *s = ctx->priv; \
350  AVFilterLink *inlink = ctx->inputs[0]; \
351  int n = 0; \
352  \
353  while (n < nb_samples) { \
354  int min_size = nb_samples - n; \
355  int max_size = 1; \
356  ptype gain = s->max_expansion; \
357  \
358  for (int ch = 0; ch < inlink->channels; ch++) { \
359  ChannelContext *cc = &s->cc[ch]; \
360  \
361  cc->bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
362  \
363  next_pi(ctx, cc, cc->bypass); \
364  min_size = FFMIN(min_size, cc->pi_size); \
365  max_size = FFMAX(max_size, cc->pi_size); \
366  } \
367  \
368  av_assert0(min_size > 0); \
369  for (int ch = 0; ch < inlink->channels; ch++) { \
370  ChannelContext *cc = &s->cc[ch]; \
371  \
372  if (cc->bypass) \
373  continue; \
374  gain = FFMIN(gain, min_gain(ctx, cc, max_size)); \
375  } \
376  \
377  for (int ch = 0; ch < inlink->channels; ch++) { \
378  ChannelContext *cc = &s->cc[ch]; \
379  ptype *dst = (ptype *)in->extended_data[ch]; \
380  \
381  consume_pi(cc, min_size); \
382  if (cc->bypass) \
383  continue; \
384  \
385  for (int i = n; i < n + min_size; i++) { \
386  ptype g = lerp(s->prev_gain, gain, (i - n) / (double)min_size); \
387  dst[i] *= g; \
388  } \
389  } \
390  \
391  s->prev_gain = gain; \
392  n += min_size; \
393  } \
394 }
395 
396 FILTER_LINK_CHANNELS(dbl, double)
397 FILTER_LINK_CHANNELS(flt, float)
398 
400 {
401  SpeechNormalizerContext *s = ctx->priv;
402  AVFilterLink *outlink = ctx->outputs[0];
403  AVFilterLink *inlink = ctx->inputs[0];
404  int ret;
405 
406  while (s->queue.available > 0) {
407  int min_pi_nb_samples;
408  AVFrame *in;
409 
410  in = ff_bufqueue_peek(&s->queue, 0);
411  if (!in)
412  break;
413 
414  min_pi_nb_samples = available_samples(ctx);
415  if (min_pi_nb_samples < in->nb_samples && !s->eof)
416  break;
417 
418  in = ff_bufqueue_get(&s->queue);
419 
421 
422  s->filter_channels[s->link](ctx, in, in->nb_samples);
423 
424  s->pts = in->pts + in->nb_samples;
425 
426  return ff_filter_frame(outlink, in);
427  }
428 
429  for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {
430  AVFrame *in;
431 
433  if (ret < 0)
434  return ret;
435  if (ret == 0)
436  break;
437 
438  ff_bufqueue_add(ctx, &s->queue, in);
439 
440  for (int ch = 0; ch < inlink->channels; ch++) {
441  ChannelContext *cc = &s->cc[ch];
442 
443  s->analyze_channel(ctx, cc, in->extended_data[ch], in->nb_samples);
444  }
445  }
446 
447  return 1;
448 }
449 
451 {
452  AVFilterLink *inlink = ctx->inputs[0];
453  AVFilterLink *outlink = ctx->outputs[0];
454  SpeechNormalizerContext *s = ctx->priv;
455  int ret, status;
456  int64_t pts;
457 
459 
460  ret = filter_frame(ctx);
461  if (ret <= 0)
462  return ret;
463 
464  if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
465  if (status == AVERROR_EOF)
466  s->eof = 1;
467  }
468 
469  if (s->eof && ff_inlink_queued_samples(inlink) == 0 &&
470  s->queue.available == 0) {
471  ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
472  return 0;
473  }
474 
475  if (s->queue.available > 0) {
476  AVFrame *in = ff_bufqueue_peek(&s->queue, 0);
477  const int nb_samples = available_samples(ctx);
478 
479  if (nb_samples >= in->nb_samples || s->eof) {
481  return 0;
482  }
483  }
484 
486 
487  return FFERROR_NOT_READY;
488 }
489 
491 {
492  AVFilterContext *ctx = inlink->dst;
493  SpeechNormalizerContext *s = ctx->priv;
494 
495  s->max_period = inlink->sample_rate / 10;
496 
497  s->prev_gain = 1.;
498  s->cc = av_calloc(inlink->channels, sizeof(*s->cc));
499  if (!s->cc)
500  return AVERROR(ENOMEM);
501 
502  for (int ch = 0; ch < inlink->channels; ch++) {
503  ChannelContext *cc = &s->cc[ch];
504 
505  cc->state = -1;
506  cc->gain_state = 1.;
507  }
508 
509  switch (inlink->format) {
510  case AV_SAMPLE_FMT_FLTP:
511  s->analyze_channel = analyze_channel_flt;
512  s->filter_channels[0] = filter_channels_flt;
513  s->filter_channels[1] = filter_link_channels_flt;
514  break;
515  case AV_SAMPLE_FMT_DBLP:
516  s->analyze_channel = analyze_channel_dbl;
517  s->filter_channels[0] = filter_channels_dbl;
518  s->filter_channels[1] = filter_link_channels_dbl;
519  break;
520  default:
521  av_assert0(0);
522  }
523 
524  return 0;
525 }
526 
527 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
528  char *res, int res_len, int flags)
529 {
530  SpeechNormalizerContext *s = ctx->priv;
531  int link = s->link;
532  int ret;
533 
534  ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
535  if (ret < 0)
536  return ret;
537  if (link != s->link)
538  s->prev_gain = 1.;
539 
540  return 0;
541 }
542 
544 {
545  SpeechNormalizerContext *s = ctx->priv;
546 
547  ff_bufqueue_discard_all(&s->queue);
548  av_freep(&s->cc);
549 }
550 
551 static const AVFilterPad inputs[] = {
552  {
553  .name = "default",
554  .type = AVMEDIA_TYPE_AUDIO,
555  .config_props = config_input,
556  },
557  { NULL }
558 };
559 
560 static const AVFilterPad outputs[] = {
561  {
562  .name = "default",
563  .type = AVMEDIA_TYPE_AUDIO,
564  },
565  { NULL }
566 };
567 
569  .name = "speechnorm",
570  .description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
571  .query_formats = query_formats,
572  .priv_size = sizeof(SpeechNormalizerContext),
573  .priv_class = &speechnorm_class,
574  .activate = activate,
575  .uninit = uninit,
576  .inputs = inputs,
577  .outputs = outputs,
579 };
formats
formats
Definition: signature.h:48
inputs
static const AVFilterPad inputs[]
Definition: af_speechnorm.c:551
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:69
AVFilterChannelLayouts
A list of supported channel layouts.
Definition: formats.h:86
get_pi_samples
static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
Definition: af_speechnorm.c:146
status
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
mix
static int mix(int c0, int c1)
Definition: 4xm.c:715
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
ff_make_format_list
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
consume_pi
static void consume_pi(ChannelContext *cc, int nb_samples)
Definition: af_speechnorm.c:183
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1096
sample_fmts
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:925
PeriodItem::type
int type
Definition: af_speechnorm.c:46
layouts
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:55
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
Definition: af_speechnorm.c:527
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
state
static struct @321 state
ff_all_channel_counts
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition.
Definition: formats.c:436
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
av_frame_make_writable
int av_frame_make_writable(AVFrame *frame)
Ensure that the frame data is writable, avoiding data copy if possible.
Definition: frame.c:611
AVOption
AVOption.
Definition: opt.h:248
PeriodItem
Definition: af_speechnorm.c:44
SpeechNormalizerContext::peak_value
double peak_value
Definition: af_speechnorm.c:64
float.h
max
#define max(a, b)
Definition: cuda_runtime.h:33
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:149
ChannelContext::gain_state
double gain_state
Definition: af_speechnorm.c:54
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
SpeechNormalizerContext::link
int link
Definition: af_speechnorm.c:72
MAX_ITEMS
#define MAX_ITEMS
Definition: af_speechnorm.c:41
ff_bufqueue_get
static AVFrame * ff_bufqueue_get(struct FFBufQueue *queue)
Get the first buffer from the queue and remove it.
Definition: bufferqueue.h:98
SpeechNormalizerContext::threshold_value
double threshold_value
Definition: af_speechnorm.c:67
AVFilterFormats
A list of supported formats for one end of a filter link.
Definition: formats.h:65
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1494
ff_af_speechnorm
AVFilter ff_af_speechnorm
Definition: af_speechnorm.c:568
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
pts
static int64_t pts
Definition: transcode_aac.c:652
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(speechnorm)
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:54
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
ff_set_common_formats
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:587
SpeechNormalizerContext::eof
int eof
Definition: af_speechnorm.c:78
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
s
#define s(width, name)
Definition: cbs_vp9.c:257
available_samples
static int available_samples(AVFilterContext *ctx)
Definition: af_speechnorm.c:167
SpeechNormalizerContext::raise_amount
double raise_amount
Definition: af_speechnorm.c:68
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:227
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
filters.h
ctx
AVFormatContext * ctx
Definition: movenc.c:48
channels
channels
Definition: aptx.h:33
SpeechNormalizerContext::cc
ChannelContext * cc
Definition: af_speechnorm.c:74
outputs
static const AVFilterPad outputs[]
Definition: af_speechnorm.c:560
f
#define f(width, name)
Definition: cbs_vp9.c:255
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
FILTER_LINK_CHANNELS
#define FILTER_LINK_CHANNELS(name, ptype)
Definition: af_speechnorm.c:345
min_gain
static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
Definition: af_speechnorm.c:228
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:67
NULL
#define NULL
Definition: coverity.c:32
ANALYZE_CHANNEL
#define ANALYZE_CHANNEL(name, ptype, zero)
Definition: af_speechnorm.c:251
ff_bufqueue_discard_all
static void ff_bufqueue_discard_all(struct FFBufQueue *queue)
Unref and remove all buffers from the queue.
Definition: bufferqueue.h:111
ChannelContext::pi_max_peak
double pi_max_peak
Definition: af_speechnorm.c:55
PeriodItem::size
int size
Definition: af_speechnorm.c:45
ChannelContext::pi_end
int pi_end
Definition: af_speechnorm.c:57
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1449
ff_inlink_queued_frames
size_t ff_inlink_queued_frames(AVFilterLink *link)
Get the number of frames available on the link.
Definition: avfilter.c:1464
bufferqueue.h
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
FFMAX
#define FFMAX(a, b)
Definition: common.h:103
AV_SAMPLE_FMT_NONE
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:59
size
int size
Definition: twinvq_data.h:10344
SpeechNormalizerContext::max_period
int max_period
Definition: af_speechnorm.c:77
AV_OPT_TYPE_CHANNEL_LAYOUT
@ AV_OPT_TYPE_CHANNEL_LAYOUT
Definition: opt.h:241
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:882
FFMIN
#define FFMIN(a, b)
Definition: common.h:105
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
ff_bufqueue_add
static void ff_bufqueue_add(void *log, struct FFBufQueue *queue, AVFrame *buf)
Add a buffer to the queue.
Definition: bufferqueue.h:71
FILTER_CHANNELS
#define FILTER_CHANNELS(name, ptype)
Definition: af_speechnorm.c:308
next_gain
static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
Definition: af_speechnorm.c:192
SpeechNormalizerContext
Definition: af_speechnorm.c:61
internal.h
in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Definition: audio_convert.c:326
ff_bufqueue_peek
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87
FFBufQueue
Structure holding the queue.
Definition: bufferqueue.h:49
invert
static void invert(float *h, int n)
Definition: asrc_sinc.c:201
AVSampleFormat
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
FLAGS
#define FLAGS
Definition: af_speechnorm.c:90
uint8_t
uint8_t
Definition: audio_convert.c:194
SpeechNormalizerContext::invert
int invert
Definition: af_speechnorm.c:71
SpeechNormalizerContext::filter_channels
void(* filter_channels[2])(AVFilterContext *ctx, AVFrame *in, int nb_samples)
Definition: af_speechnorm.c:85
SpeechNormalizerContext::queue
struct FFBufQueue queue
Definition: af_speechnorm.c:81
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:60
ff_inlink_queued_samples
int ff_inlink_queued_samples(AVFilterLink *link)
Definition: avfilter.c:1474
ChannelContext::pi_size
int pi_size
Definition: af_speechnorm.c:58
SpeechNormalizerContext::fall_amount
double fall_amount
Definition: af_speechnorm.c:69
activate
static int activate(AVFilterContext *ctx)
Definition: af_speechnorm.c:450
AVFilter
Filter definition.
Definition: avfilter.h:145
ret
ret
Definition: filter_design.txt:187
PeriodItem::max_peak
double max_peak
Definition: af_speechnorm.c:47
ChannelContext::bypass
int bypass
Definition: af_speechnorm.c:52
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_speechnorm.c:543
ff_all_samplerates
AVFilterFormats * ff_all_samplerates(void)
Definition: formats.c:421
config_input
static int config_input(AVFilterLink *inlink)
Definition: af_speechnorm.c:490
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:245
next_pi
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
Definition: af_speechnorm.c:208
avfilter.h
ChannelContext::pi_start
int pi_start
Definition: af_speechnorm.c:56
AV_SAMPLE_FMT_DBLP
@ AV_SAMPLE_FMT_DBLP
double, planar
Definition: samplefmt.h:70
ChannelContext
Definition: hcadec.c:30
ChannelContext::state
int state
Definition: af_speechnorm.c:51
SpeechNormalizerContext::max_compression
double max_compression
Definition: af_speechnorm.c:66
ChannelContext::pi
PeriodItem pi[MAX_ITEMS]
Definition: af_speechnorm.c:53
AVFilterContext
An instance of a filter.
Definition: avfilter.h:341
filter_frame
static int filter_frame(AVFilterContext *ctx)
Definition: af_speechnorm.c:399
audio.h
SpeechNormalizerContext::channels
uint64_t channels
Definition: af_speechnorm.c:70
query_formats
static int query_formats(AVFilterContext *ctx)
Definition: af_speechnorm.c:116
lerp
static double lerp(double min, double max, double mix)
Definition: af_speechnorm.c:340
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:242
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
OFFSET
#define OFFSET(x)
Definition: af_speechnorm.c:89
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
ff_set_common_samplerates
int ff_set_common_samplerates(AVFilterContext *ctx, AVFilterFormats *samplerates)
Definition: formats.c:575
SpeechNormalizerContext::pts
int64_t pts
Definition: af_speechnorm.c:79
SpeechNormalizerContext::max_expansion
double max_expansion
Definition: af_speechnorm.c:65
speechnorm_options
static const AVOption speechnorm_options[]
Definition: af_speechnorm.c:92
SpeechNormalizerContext::analyze_channel
void(* analyze_channel)(AVFilterContext *ctx, ChannelContext *cc, const uint8_t *srcp, int nb_samples)
Definition: af_speechnorm.c:83
ff_filter_set_ready
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:193
ff_set_common_channel_layouts
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *channel_layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates.
Definition: formats.c:568
min
float min
Definition: vorbis_enc_data.h:456
SpeechNormalizerContext::prev_gain
double prev_gain
Definition: af_speechnorm.c:75