[FFmpeg-cvslog] avfilter/af_adynamicequalizer: add adaptive detection mode

Fri Nov 17 01:13:50 EET 2023

ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Wed Nov  8 14:31:50 2023 +0100| [08e97dae205d10806a0360bfc62f654d629dda93] | committer: Paul B Mahol

avfilter/af_adynamicequalizer: add adaptive detection mode

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=08e97dae205d10806a0360bfc62f654d629dda93
---

 doc/filters.texi                         |   2 +
 libavfilter/adynamicequalizer_template.c | 120 +++++++++++++++++++++++++++++++
 libavfilter/af_adynamicequalizer.c       |  27 +++++++
 3 files changed, 149 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index 13c18a2574..d83a3fb91e 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -1100,6 +1100,8 @@ Disable using automatically gathered threshold value.
 Stop picking threshold value.
 @item on
 Start picking threshold value.
+ at item adaptive
+Adaptively pick threshold value, by calculating sliding window entropy.
 @end table
 
 @item precision
diff --git a/libavfilter/adynamicequalizer_template.c b/libavfilter/adynamicequalizer_template.c
index 653d51c3a9..4eb2489cd6 100644
--- a/libavfilter/adynamicequalizer_template.c
+++ b/libavfilter/adynamicequalizer_template.c
@@ -27,7 +27,11 @@
 #undef CLIP
 #undef SAMPLE_FORMAT
 #undef FABS
+#undef FLOG
+#undef FEXP
+#undef FLOG2
 #undef FLOG10
+#undef FEXP2
 #undef FEXP10
 #undef EPSILON
 #if DEPTH == 32
@@ -41,7 +45,11 @@
 #define FMAX fmaxf
 #define CLIP av_clipf
 #define FABS fabsf
+#define FLOG logf
+#define FEXP expf
+#define FLOG2 log2f
 #define FLOG10 log10f
+#define FEXP2 exp2f
 #define FEXP10 ff_exp10f
 #define EPSILON (1.f / (1 << 23))
 #define ftype float
@@ -56,7 +64,11 @@
 #define FMAX fmax
 #define CLIP av_clipd
 #define FABS fabs
+#define FLOG log
+#define FEXP exp
+#define FLOG2 log2
 #define FLOG10 log10
+#define FEXP2 exp2
 #define FEXP10 ff_exp10
 #define EPSILON (1.0 / (1LL << 53))
 #define ftype double
@@ -150,6 +162,92 @@ static int fn(filter_prepare)(AVFilterContext *ctx)
     return 0;
 }
 
+#define PEAKS(empty_value,op,sample, psample)\
+    if (!empty && psample == ss[front]) {    \
+        ss[front] = empty_value;             \
+        if (back != front) {                 \
+            front--;                         \
+            if (front < 0)                   \
+                front = n - 1;               \
+        }                                    \
+        empty = front == back;               \
+    }                                        \
+                                             \
+    if (!empty && sample op ss[front]) {     \
+        while (1) {                          \
+            ss[front] = empty_value;         \
+            if (back == front) {             \
+                empty = 1;                   \
+                break;                       \
+            }                                \
+            front--;                         \
+            if (front < 0)                   \
+                front = n - 1;               \
+        }                                    \
+    }                                        \
+                                             \
+    while (!empty && sample op ss[back]) {   \
+        ss[back] = empty_value;              \
+        if (back == front) {                 \
+            empty = 1;                       \
+            break;                           \
+        }                                    \
+        back++;                              \
+        if (back >= n)                       \
+            back = 0;                        \
+    }                                        \
+                                             \
+    if (!empty) {                            \
+        back--;                              \
+        if (back < 0)                        \
+            back = n - 1;                    \
+    }
+
+static void fn(queue_sample)(ChannelContext *cc,
+                             const ftype x,
+                             const int nb_samples)
+{
+    ftype *ss = cc->dqueue;
+    ftype *qq = cc->queue;
+    int front = cc->front;
+    int back = cc->back;
+    int empty, n, pos = cc->position;
+    ftype px = qq[pos];
+
+    fn(cc->sum) += x;
+    fn(cc->log_sum) += FLOG2(x);
+    if (cc->size >= nb_samples) {
+        fn(cc->sum) -= px;
+        fn(cc->log_sum) -= FLOG2(px);
+    }
+
+    qq[pos] = x;
+    pos++;
+    if (pos >= nb_samples)
+        pos = 0;
+    cc->position = pos;
+
+    if (cc->size < nb_samples)
+        cc->size++;
+    n = cc->size;
+
+    empty = (front == back) && (ss[front] == ZERO);
+    PEAKS(ZERO, >, x, px)
+
+    ss[back] = x;
+
+    cc->front = front;
+    cc->back = back;
+}
+
+static ftype fn(get_peak)(ChannelContext *cc, ftype *score)
+{
+    ftype s, *ss = cc->dqueue;
+    s = FEXP2(fn(cc->log_sum) / cc->size) / (fn(cc->sum) / cc->size);
+    *score = LIN2LOG(s);
+    return ss[cc->front];
+}
+
 static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
     AudioDynamicEqualizerContext *s = ctx->priv;
@@ -157,6 +255,7 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n
     AVFrame *in = td->in;
     AVFrame *out = td->out;
     const ftype sample_rate = in->sample_rate;
+    const int isample_rate = in->sample_rate;
     const ftype makeup = s->makeup;
     const ftype ratio = s->ratio;
     const ftype range = s->range;
@@ -197,6 +296,27 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n
 
             fn(cc->new_threshold_log) = FMAX(fn(cc->new_threshold_log), LIN2LOG(new_threshold));
         }
+    } else if (detection == DET_ADAPTIVE) {
+        for (int ch = start; ch < end; ch++) {
+            const ftype *src = (const ftype *)in->extended_data[ch];
+            ChannelContext *cc = &s->cc[ch];
+            ftype *tstate = fn(cc->tstate);
+            ftype score, peak;
+
+            for (int n = 0; n < in->nb_samples; n++) {
+                ftype detect = FMAX(FABS(fn(get_svf)(src[n], dm, da, tstate)), EPSILON);
+                fn(queue_sample)(cc, detect, isample_rate);
+            }
+
+            peak = fn(get_peak)(cc, &score);
+
+            if (score >= -3.5) {
+                fn(cc->threshold_log) = LIN2LOG(peak);
+            } else if (cc->detection == DET_UNSET) {
+                fn(cc->threshold_log) = s->threshold_log;
+            }
+            cc->detection = detection;
+        }
     } else if (detection == DET_DISABLED) {
         for (int ch = start; ch < end; ch++) {
             ChannelContext *cc = &s->cc[ch];
diff --git a/libavfilter/af_adynamicequalizer.c b/libavfilter/af_adynamicequalizer.c
index 1926ae8ec1..611e542c1b 100644
--- a/libavfilter/af_adynamicequalizer.c
+++ b/libavfilter/af_adynamicequalizer.c
@@ -29,6 +29,7 @@ enum DetectionModes {
     DET_DISABLED,
     DET_OFF,
     DET_ON,
+    DET_ADAPTIVE,
     NB_DMODES,
 };
 
@@ -50,6 +51,8 @@ typedef struct ChannelContext {
     double detect_double;
     double threshold_log_double;
     double new_threshold_log_double;
+    double log_sum_double;
+    double sum_double;
     float fa_float[3], fm_float[3];
     float dstate_float[2];
     float fstate_float[2];
@@ -58,6 +61,14 @@ typedef struct ChannelContext {
     float detect_float;
     float threshold_log_float;
     float new_threshold_log_float;
+    float log_sum_float;
+    float sum_float;
+    void *dqueue;
+    void *queue;
+    int position;
+    int size;
+    int front;
+    int back;
     int detection;
     int init;
 } ChannelContext;
@@ -86,6 +97,7 @@ typedef struct AudioDynamicEqualizerContext {
     int dftype;
     int precision;
     int format;
+    int nb_channels;
 
     int (*filter_prepare)(AVFilterContext *ctx);
     int (*filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
@@ -140,6 +152,7 @@ static int config_input(AVFilterLink *inlink)
     s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc));
     if (!s->cc)
         return AVERROR(ENOMEM);
+    s->nb_channels = inlink->ch_layout.nb_channels;
 
     switch (s->format) {
     case AV_SAMPLE_FMT_DBLP:
@@ -152,6 +165,14 @@ static int config_input(AVFilterLink *inlink)
         break;
     }
 
+    for (int ch = 0; ch < s->nb_channels; ch++) {
+        ChannelContext *cc = &s->cc[ch];
+        cc->queue = av_calloc(inlink->sample_rate, sizeof(double));
+        cc->dqueue = av_calloc(inlink->sample_rate, sizeof(double));
+        if (!cc->queue || !cc->dqueue)
+            return AVERROR(ENOMEM);
+    }
+
     return 0;
 }
 
@@ -189,6 +210,11 @@ static av_cold void uninit(AVFilterContext *ctx)
 {
     AudioDynamicEqualizerContext *s = ctx->priv;
 
+    for (int ch = 0; ch < s->nb_channels; ch++) {
+        ChannelContext *cc = &s->cc[ch];
+        av_freep(&cc->queue);
+        av_freep(&cc->dqueue);
+    }
     av_freep(&s->cc);
 }
 
@@ -226,6 +252,7 @@ static const AVOption adynamicequalizer_options[] = {
     {   "disabled", 0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_DISABLED}, 0, 0,   FLAGS, "auto" },
     {   "off",      0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_OFF},      0, 0,   FLAGS, "auto" },
     {   "on",       0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_ON},       0, 0,   FLAGS, "auto" },
+    {   "adaptive", 0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_ADAPTIVE}, 0, 0,   FLAGS, "auto" },
     { "precision", "set processing precision", OFFSET(precision),  AV_OPT_TYPE_INT,    {.i64=0},        0, 2,       AF, "precision" },
     {   "auto",  "set auto processing precision",                  0, AV_OPT_TYPE_CONST, {.i64=0},      0, 0,       AF, "precision" },
     {   "float", "set single-floating point processing precision", 0, AV_OPT_TYPE_CONST, {.i64=1},      0, 0,       AF, "precision" },