FFmpeg
Data Structures | Macros | Functions | Variables
af_speechnorm.c File Reference
#include <float.h>
#include "libavutil/avassert.h"
#include "libavutil/opt.h"
#include "bufferqueue.h"
#include "audio.h"
#include "avfilter.h"
#include "filters.h"
#include "internal.h"

Go to the source code of this file.

Data Structures

struct  PeriodItem
 
struct  ChannelContext
 
struct  SpeechNormalizerContext
 

Macros

#define FF_BUFQUEUE_SIZE   (1024)
 
#define MAX_ITEMS   882000
 
#define MIN_PEAK   (1. / 32768.)
 
#define OFFSET(x)   offsetof(SpeechNormalizerContext, x)
 
#define FLAGS   AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
 
#define ANALYZE_CHANNEL(name, ptype, zero)
 
#define FILTER_CHANNELS(name, ptype)
 
#define FILTER_LINK_CHANNELS(name, ptype)
 

Functions

 AVFILTER_DEFINE_CLASS (speechnorm)
 
static int query_formats (AVFilterContext *ctx)
 
static int get_pi_samples (PeriodItem *pi, int start, int end, int remain)
 
static int available_samples (AVFilterContext *ctx)
 
static void consume_pi (ChannelContext *cc, int nb_samples)
 
static double next_gain (AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
 
static void next_pi (AVFilterContext *ctx, ChannelContext *cc, int bypass)
 
static double min_gain (AVFilterContext *ctx, ChannelContext *cc, int max_size)
 
static double lerp (double min, double max, double mix)
 
static int filter_frame (AVFilterContext *ctx)
 
static int activate (AVFilterContext *ctx)
 
static int config_input (AVFilterLink *inlink)
 
static int process_command (AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
 
static av_cold void uninit (AVFilterContext *ctx)
 

Variables

static const AVOption speechnorm_options []
 
static const AVFilterPad inputs []
 
static const AVFilterPad outputs []
 
AVFilter ff_af_speechnorm
 

Detailed Description

Speech Normalizer

Definition in file af_speechnorm.c.

Macro Definition Documentation

◆ FF_BUFQUEUE_SIZE

#define FF_BUFQUEUE_SIZE   (1024)

Definition at line 33 of file af_speechnorm.c.

◆ MAX_ITEMS

#define MAX_ITEMS   882000

Definition at line 41 of file af_speechnorm.c.

◆ MIN_PEAK

#define MIN_PEAK   (1. / 32768.)

Definition at line 42 of file af_speechnorm.c.

◆ OFFSET

#define OFFSET (   x)    offsetof(SpeechNormalizerContext, x)

Definition at line 89 of file af_speechnorm.c.

◆ FLAGS

Definition at line 90 of file af_speechnorm.c.

◆ ANALYZE_CHANNEL

#define ANALYZE_CHANNEL (   name,
  ptype,
  zero 
)

Definition at line 251 of file af_speechnorm.c.

◆ FILTER_CHANNELS

#define FILTER_CHANNELS (   name,
  ptype 
)
Value:
static void filter_channels_## name (AVFilterContext *ctx, \
AVFrame *in, int nb_samples) \
{ \
SpeechNormalizerContext *s = ctx->priv; \
AVFilterLink *inlink = ctx->inputs[0]; \
for (int ch = 0; ch < inlink->channels; ch++) { \
ChannelContext *cc = &s->cc[ch]; \
ptype *dst = (ptype *)in->extended_data[ch]; \
const int bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
int n = 0; \
\
while (n < nb_samples) { \
ptype gain; \
int size; \
next_pi(ctx, cc, bypass); \
size = FFMIN(nb_samples - n, cc->pi_size); \
av_assert0(size > 0); \
gain = cc->gain_state; \
consume_pi(cc, size); \
for (int i = n; i < n + size; i++) \
dst[i] *= gain; \
n += size; \
} \
} \
}

Definition at line 308 of file af_speechnorm.c.

◆ FILTER_LINK_CHANNELS

#define FILTER_LINK_CHANNELS (   name,
  ptype 
)

Definition at line 345 of file af_speechnorm.c.

Function Documentation

◆ AVFILTER_DEFINE_CLASS()

AVFILTER_DEFINE_CLASS ( speechnorm  )

◆ query_formats()

static int query_formats ( AVFilterContext ctx)
static

Definition at line 116 of file af_speechnorm.c.

◆ get_pi_samples()

static int get_pi_samples ( PeriodItem pi,
int  start,
int  end,
int  remain 
)
static

Definition at line 146 of file af_speechnorm.c.

Referenced by available_samples().

◆ available_samples()

static int available_samples ( AVFilterContext ctx)
static

Definition at line 167 of file af_speechnorm.c.

Referenced by activate(), and filter_frame().

◆ consume_pi()

static void consume_pi ( ChannelContext cc,
int  nb_samples 
)
static

Definition at line 183 of file af_speechnorm.c.

◆ next_gain()

static double next_gain ( AVFilterContext ctx,
double  pi_max_peak,
int  bypass,
double  state 
)
static

Definition at line 192 of file af_speechnorm.c.

Referenced by min_gain(), and next_pi().

◆ next_pi()

static void next_pi ( AVFilterContext ctx,
ChannelContext cc,
int  bypass 
)
static

Definition at line 208 of file af_speechnorm.c.

◆ min_gain()

static double min_gain ( AVFilterContext ctx,
ChannelContext cc,
int  max_size 
)
static

Definition at line 228 of file af_speechnorm.c.

◆ lerp()

static double lerp ( double  min,
double  max,
double  mix 
)
static

Definition at line 340 of file af_speechnorm.c.

◆ filter_frame()

static int filter_frame ( AVFilterContext ctx)
static

Definition at line 399 of file af_speechnorm.c.

Referenced by activate().

◆ activate()

static int activate ( AVFilterContext ctx)
static

Definition at line 450 of file af_speechnorm.c.

◆ config_input()

static int config_input ( AVFilterLink inlink)
static

Definition at line 490 of file af_speechnorm.c.

◆ process_command()

static int process_command ( AVFilterContext ctx,
const char *  cmd,
const char *  args,
char *  res,
int  res_len,
int  flags 
)
static

Definition at line 527 of file af_speechnorm.c.

◆ uninit()

static av_cold void uninit ( AVFilterContext ctx)
static

Definition at line 543 of file af_speechnorm.c.

Variable Documentation

◆ speechnorm_options

const AVOption speechnorm_options[]
static
Initial value:
= {
{ "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
{ "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
{ "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
{ "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
{ "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
{ "h", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
{ "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ NULL }
}

Definition at line 92 of file af_speechnorm.c.

◆ inputs

const AVFilterPad inputs[]
static
Initial value:
= {
{
.name = "default",
.config_props = config_input,
},
{ NULL }
}

Definition at line 551 of file af_speechnorm.c.

◆ outputs

const AVFilterPad outputs[]
static
Initial value:
= {
{
.name = "default",
},
{ NULL }
}

Definition at line 560 of file af_speechnorm.c.

◆ ff_af_speechnorm

AVFilter ff_af_speechnorm
Initial value:
= {
.name = "speechnorm",
.description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
.query_formats = query_formats,
.priv_size = sizeof(SpeechNormalizerContext),
.priv_class = &speechnorm_class,
}

Definition at line 568 of file af_speechnorm.c.

inputs
static const AVFilterPad inputs[]
Definition: af_speechnorm.c:551
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
Definition: af_speechnorm.c:527
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
s
#define s(width, name)
Definition: cbs_vp9.c:257
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:227
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
ctx
AVFormatContext * ctx
Definition: movenc.c:48
channels
channels
Definition: aptx.h:33
outputs
static const AVFilterPad outputs[]
Definition: af_speechnorm.c:560
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
NULL
#define NULL
Definition: coverity.c:32
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
size
int size
Definition: twinvq_data.h:10344
AV_OPT_TYPE_CHANNEL_LAYOUT
@ AV_OPT_TYPE_CHANNEL_LAYOUT
Definition: opt.h:241
FFMIN
#define FFMIN(a, b)
Definition: common.h:105
SpeechNormalizerContext
Definition: af_speechnorm.c:61
in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Definition: audio_convert.c:326
i
int i
Definition: input.c:407
av_channel_layout_extract_channel
uint64_t av_channel_layout_extract_channel(uint64_t channel_layout, int index)
Get the channel with the given index in channel_layout.
Definition: channel_layout.c:271
invert
static void invert(float *h, int n)
Definition: asrc_sinc.c:201
FLAGS
#define FLAGS
Definition: af_speechnorm.c:90
activate
static int activate(AVFilterContext *ctx)
Definition: af_speechnorm.c:450
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_speechnorm.c:543
config_input
static int config_input(AVFilterLink *inlink)
Definition: af_speechnorm.c:490
next_pi
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
Definition: af_speechnorm.c:208
AVFilterContext
An instance of a filter.
Definition: avfilter.h:341
query_formats
static int query_formats(AVFilterContext *ctx)
Definition: af_speechnorm.c:116
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:242
OFFSET
#define OFFSET(x)
Definition: af_speechnorm.c:89