[FFmpeg-cvslog] avfilter/af_afir: fix IR switching with different lengths

Mon Apr 24 00:41:55 EEST 2023

ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Sun Apr 23 23:30:42 2023 +0200| [ef338cfd7b435703cf56ed5e6527b8f1953f90f1] | committer: Paul B Mahol

avfilter/af_afir: fix IR switching with different lengths

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ef338cfd7b435703cf56ed5e6527b8f1953f90f1
---

 libavfilter/af_afir.c       |  76 +++++++++++++++++------------
 libavfilter/af_afir.h       |   9 ++--
 libavfilter/afir_template.c | 116 +++++++++++---------------------------------
 3 files changed, 79 insertions(+), 122 deletions(-)

diff --git a/libavfilter/af_afir.c b/libavfilter/af_afir.c
index 0fe038972f..fab751cb9f 100644
--- a/libavfilter/af_afir.c
+++ b/libavfilter/af_afir.c
@@ -106,14 +106,20 @@ static int fir_channel(AVFilterContext *ctx, AVFrame *out, int ch)
 {
     AudioFIRContext *s = ctx->priv;
     const int min_part_size = s->min_part_size;
+    const int prev_selir = s->prev_selir;
+    const int selir = s->selir;
 
     for (int offset = 0; offset < out->nb_samples; offset += min_part_size) {
         switch (s->format) {
         case AV_SAMPLE_FMT_FLTP:
-            fir_quantum_float(ctx, out, ch, offset);
+            if (prev_selir != selir)
+                fir_quantum_float(ctx, out, ch, offset, prev_selir);
+            fir_quantum_float(ctx, out, ch, offset, selir);
             break;
         case AV_SAMPLE_FMT_DBLP:
-            fir_quantum_double(ctx, out, ch, offset);
+            if (prev_selir != selir)
+                fir_quantum_double(ctx, out, ch, offset, prev_selir);
+            fir_quantum_double(ctx, out, ch, offset, selir);
             break;
         }
     }
@@ -179,10 +185,9 @@ static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, int selir,
     seg->input_size    = offset + s->min_part_size;
     seg->input_offset  = offset;
 
-    seg->loading       = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*seg->loading));
     seg->part_index    = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*seg->part_index));
     seg->output_offset = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*seg->output_offset));
-    if (!seg->part_index || !seg->output_offset || !seg->loading)
+    if (!seg->part_index || !seg->output_offset)
         return AVERROR(ENOMEM);
 
     switch (s->format) {
@@ -253,7 +258,6 @@ static void uninit_segment(AVFilterContext *ctx, AudioFIRSegment *seg)
     }
     av_freep(&seg->itx);
 
-    av_freep(&seg->loading);
     av_freep(&seg->output_offset);
     av_freep(&seg->part_index);
 
@@ -268,7 +272,7 @@ static void uninit_segment(AVFilterContext *ctx, AudioFIRSegment *seg)
     seg->input_size = 0;
 
     for (int i = 0; i < MAX_IR_STREAMS; i++)
-        av_frame_free(&seg->coeff[i]);
+        av_frame_free(&seg->coeff);
 }
 
 static int convert_coeffs(AVFilterContext *ctx, int selir)
@@ -287,7 +291,7 @@ static int convert_coeffs(AVFilterContext *ctx, int selir)
         if (s->minp > s->maxp)
             s->maxp = s->minp;
 
-        if (s->nb_segments)
+        if (s->nb_segments[selir])
             goto skip;
 
         left = s->nb_taps[selir];
@@ -300,8 +304,8 @@ static int convert_coeffs(AVFilterContext *ctx, int selir)
             int step = part_size == max_part_size ? INT_MAX : 1 + (i == 0);
             int nb_partitions = FFMIN(step, (left + part_size - 1) / part_size);
 
-            s->nb_segments = i + 1;
-            ret = init_segment(ctx, &s->seg[i], selir, offset, nb_partitions, part_size, i);
+            s->nb_segments[selir] = i + 1;
+            ret = init_segment(ctx, &s->seg[selir][i], selir, offset, nb_partitions, part_size, i);
             if (ret < 0)
                 return ret;
             offset += nb_partitions * part_size;
@@ -309,6 +313,8 @@ static int convert_coeffs(AVFilterContext *ctx, int selir)
             part_size *= 2;
             part_size = FFMIN(part_size, max_part_size);
         }
+
+        s->max_offset[selir] = offset;
     }
 
 skip:
@@ -342,7 +348,7 @@ skip:
     }
 
     av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", cur_nb_taps);
-    av_log(ctx, AV_LOG_DEBUG, "nb_segments: %d\n", s->nb_segments);
+    av_log(ctx, AV_LOG_DEBUG, "nb_segments: %d\n", s->nb_segments[selir]);
 
     switch (s->format) {
     case AV_SAMPLE_FMT_FLTP:
@@ -356,12 +362,12 @@ skip:
 
             get_power_float(ctx, s, nb_taps, ch, time);
 
-            for (int n = 0; n < s->nb_segments; n++) {
-                AudioFIRSegment *seg = &s->seg[n];
+            for (int n = 0; n < s->nb_segments[selir]; n++) {
+                AudioFIRSegment *seg = &s->seg[selir][n];
 
-                if (!seg->coeff[selir])
-                    seg->coeff[selir] = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->coeff_size * 2);
-                if (!seg->coeff[selir])
+                if (!seg->coeff)
+                    seg->coeff = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->coeff_size * 2);
+                if (!seg->coeff)
                     return AVERROR(ENOMEM);
 
                 for (int i = 0; i < seg->nb_partitions; i++)
@@ -379,12 +385,12 @@ skip:
                 time[i] = 0;
 
             get_power_double(ctx, s, nb_taps, ch, time);
-            for (int n = 0; n < s->nb_segments; n++) {
-                AudioFIRSegment *seg = &s->seg[n];
+            for (int n = 0; n < s->nb_segments[selir]; n++) {
+                AudioFIRSegment *seg = &s->seg[selir][n];
 
-                if (!seg->coeff[selir])
-                    seg->coeff[selir] = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->coeff_size * 2);
-                if (!seg->coeff[selir])
+                if (!seg->coeff)
+                    seg->coeff = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->coeff_size * 2);
+                if (!seg->coeff)
                     return AVERROR(ENOMEM);
 
                 for (int i = 0; i < seg->nb_partitions; i++)
@@ -449,12 +455,23 @@ static int activate(AVFilterContext *ctx)
             return ret;
     }
 
+    if (s->selir != s->prev_selir && s->loading[0] <= 0) {
+        for (int ch = 0; ch < s->nb_channels; ch++)
+            s->loading[ch] = s->max_offset[s->selir] + s->min_part_size;
+    }
+
     available = ff_inlink_queued_samples(ctx->inputs[0]);
     wanted = FFMAX(s->min_part_size, (available / s->min_part_size) * s->min_part_size);
     ret = ff_inlink_consume_samples(ctx->inputs[0], wanted, wanted, &in);
     if (ret > 0)
         ret = fir_frame(s, in, outlink);
 
+    if (s->selir != s->prev_selir && s->loading[0] <= 0) {
+        s->prev_selir = s->selir;
+        for (int ch = 0; ch < s->nb_channels; ch++)
+            s->loading[ch] = 0;
+    }
+
     if (ret < 0)
         return ret;
 
@@ -571,6 +588,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
     s->format = outlink->format;
     s->nb_channels = outlink->ch_layout.nb_channels;
+    s->loading = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*s->loading));
+    if (!s->loading)
+        return AVERROR(ENOMEM);
 
     return 0;
 }
@@ -579,12 +599,13 @@ static av_cold void uninit(AVFilterContext *ctx)
 {
     AudioFIRContext *s = ctx->priv;
 
-    for (int i = 0; i < s->nb_segments; i++)
-        uninit_segment(ctx, &s->seg[i]);
-
     av_freep(&s->fdsp);
+    av_freep(&s->loading);
 
     for (int i = 0; i < s->nb_irs; i++) {
+        for (int j = 0; j < s->nb_segments[i]; j++)
+            uninit_segment(ctx, &s->seg[i][j]);
+
         av_frame_free(&s->ir[i]);
         av_frame_free(&s->norm_ir[i]);
     }
@@ -689,15 +710,8 @@ static int process_command(AVFilterContext *ctx,
         return ret;
 
     s->selir = FFMIN(s->nb_irs - 1, s->selir);
-    if (s->selir != prev_selir) {
+    if (s->selir != prev_selir)
         s->prev_selir = prev_selir;
-        for (int n = 0; n < s->nb_segments; n++) {
-            AudioFIRSegment *seg = &s->seg[n];
-
-            for (int ch = 0; ch < s->nb_channels; ch++)
-                seg->loading[ch] = 0;
-        }
-    }
 
     return 0;
 }
diff --git a/libavfilter/af_afir.h b/libavfilter/af_afir.h
index a9f6d217f4..11564d5b12 100644
--- a/libavfilter/af_afir.h
+++ b/libavfilter/af_afir.h
@@ -39,7 +39,6 @@ typedef struct AudioFIRSegment {
     int input_size;
     int input_offset;
 
-    int *loading;
     int *output_offset;
     int *part_index;
 
@@ -49,7 +48,7 @@ typedef struct AudioFIRSegment {
     AVFrame *tempin;
     AVFrame *tempout;
     AVFrame *buffer;
-    AVFrame *coeff[MAX_IR_STREAMS];
+    AVFrame *coeff;
     AVFrame *input;
     AVFrame *output;
 
@@ -82,11 +81,13 @@ typedef struct AudioFIRContext {
     int eof_coeffs[MAX_IR_STREAMS];
     int have_coeffs[MAX_IR_STREAMS];
     int nb_taps[MAX_IR_STREAMS];
+    int nb_segments[MAX_IR_STREAMS];
+    int max_offset[MAX_IR_STREAMS];
     int nb_channels;
     int one2many;
+    int *loading;
 
-    AudioFIRSegment seg[1024];
-    int nb_segments;
+    AudioFIRSegment seg[MAX_IR_STREAMS][1024];
 
     AVFrame *in;
     AVFrame *ir[MAX_IR_STREAMS];
diff --git a/libavfilter/afir_template.c b/libavfilter/afir_template.c
index 3f3778c675..63d95de900 100644
--- a/libavfilter/afir_template.c
+++ b/libavfilter/afir_template.c
@@ -251,7 +251,7 @@ static void fn(convert_channel)(AVFilterContext *ctx, AudioFIRContext *s, int ch
     ftype *time = (ftype *)s->norm_ir[selir]->extended_data[ch];
     ftype *tempin = (ftype *)seg->tempin->extended_data[ch];
     ftype *tempout = (ftype *)seg->tempout->extended_data[ch];
-    ctype *coeff = (ctype *)seg->coeff[selir]->extended_data[ch];
+    ctype *coeff = (ctype *)seg->coeff->extended_data[ch];
     const int remaining = nb_taps - (seg->input_offset + coeff_partition * seg->part_size);
     const int size = remaining >= seg->part_size ? seg->part_size : remaining;
 
@@ -285,19 +285,18 @@ static void fn(fir_fadd)(AudioFIRContext *s, ftype *dst, const ftype *src, int n
     }
 }
 
-static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offset)
+static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offset, int selir)
 {
     AudioFIRContext *s = ctx->priv;
     const ftype *in = (const ftype *)s->in->extended_data[ch] + offset;
     ftype *blockout, *ptr = (ftype *)out->extended_data[ch] + offset;
     const int min_part_size = s->min_part_size;
     const int nb_samples = FFMIN(min_part_size, out->nb_samples - offset);
-    const int nb_segments = s->nb_segments;
+    const int nb_segments = s->nb_segments[selir];
     const float dry_gain = s->dry_gain;
-    const int selir = s->selir;
 
     for (int segment = 0; segment < nb_segments; segment++) {
-        AudioFIRSegment *seg = &s->seg[segment];
+        AudioFIRSegment *seg = &s->seg[selir][segment];
         ftype *src = (ftype *)seg->input->extended_data[ch];
         ftype *dst = (ftype *)seg->output->extended_data[ch];
         ftype *sumin = (ftype *)seg->sumin->extended_data[ch];
@@ -337,70 +336,18 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse
 
         memset(sumin, 0, sizeof(*sumin) * seg->fft_length);
 
-        if (seg->loading[ch] < nb_partitions) {
-            j = seg->part_index[ch] <= 0 ? nb_partitions - 1 : seg->part_index[ch] - 1;
-            for (int i = 0; i < nb_partitions; i++) {
-                const int input_partition = j;
-                const int coeff_partition = i;
-                const int coffset = coeff_partition * seg->coeff_size;
-                const ftype *blockout = (const ftype *)seg->blockout->extended_data[ch] + input_partition * seg->block_size;
-                const ctype *coeff = ((const ctype *)seg->coeff[selir]->extended_data[ch]) + coffset;
-
-                if (j == 0)
-                    j = nb_partitions;
-                j--;
-
-#if DEPTH == 32
-                s->afirdsp.fcmul_add(sumin, blockout, (const ftype *)coeff, part_size);
-#else
-                s->afirdsp.dcmul_add(sumin, blockout, (const ftype *)coeff, part_size);
-#endif
-            }
-
-            seg->itx_fn(seg->itx[ch], sumout, sumin, sizeof(ctype));
-            memcpy(dst + part_size, sumout + part_size, part_size * sizeof(*buf));
-            memset(sumin, 0, sizeof(*sumin) * seg->fft_length);
-        }
-
         blockout = (ftype *)seg->blockout->extended_data[ch] + seg->part_index[ch] * seg->block_size;
         memset(tempin + part_size, 0, sizeof(*tempin) * (seg->block_size - part_size));
         memcpy(tempin, src, sizeof(*src) * part_size);
         seg->tx_fn(seg->tx[ch], blockout, tempin, sizeof(ftype));
 
-        if (seg->loading[ch] < nb_partitions) {
-            const int selir = s->prev_selir;
-
-            j = seg->part_index[ch];
-            for (int i = 0; i < nb_partitions; i++) {
-                const int input_partition = j;
-                const int coeff_partition = i;
-                const int coffset = coeff_partition * seg->coeff_size;
-                const ftype *blockout = (const ftype *)seg->blockout->extended_data[ch] + input_partition * seg->block_size;
-                const ctype *coeff = ((const ctype *)seg->coeff[selir]->extended_data[ch]) + coffset;
-
-                if (j == 0)
-                    j = nb_partitions;
-                j--;
-
-#if DEPTH == 32
-                s->afirdsp.fcmul_add(sumin, blockout, (const ftype *)coeff, part_size);
-#else
-                s->afirdsp.dcmul_add(sumin, blockout, (const ftype *)coeff, part_size);
-#endif
-            }
-
-            seg->itx_fn(seg->itx[ch], sumout, sumin, sizeof(ctype));
-            memcpy(dst + 2 * part_size, sumout, 2 * part_size * sizeof(*dst));
-            memset(sumin, 0, sizeof(*sumin) * seg->fft_length);
-        }
-
         j = seg->part_index[ch];
         for (int i = 0; i < nb_partitions; i++) {
             const int input_partition = j;
             const int coeff_partition = i;
             const int coffset = coeff_partition * seg->coeff_size;
             const ftype *blockout = (const ftype *)seg->blockout->extended_data[ch] + input_partition * seg->block_size;
-            const ctype *coeff = ((const ctype *)seg->coeff[selir]->extended_data[ch]) + coffset;
+            const ctype *coeff = ((const ctype *)seg->coeff->extended_data[ch]) + coffset;
 
             if (j == 0)
                 j = nb_partitions;
@@ -415,45 +362,40 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse
 
         seg->itx_fn(seg->itx[ch], sumout, sumin, sizeof(ctype));
 
-        if (seg->loading[ch] < nb_partitions) {
-            ftype *ptr1 = dst + part_size;
-            ftype *ptr2 = dst + part_size * 2;
-            ftype *ptr3 = dst + part_size * 3;
-            ftype *ptr4 = dst + part_size * 4;
-            if (seg->loading[ch] == 0)
-                memcpy(ptr4, buf, sizeof(*ptr4) * part_size);
-            for (int n = 0; n < part_size; n++)
-                ptr2[n] += ptr4[n];
-
-            if (seg->loading[ch] < nb_partitions - 1)
-                memcpy(ptr4, ptr3, part_size * sizeof(*dst));
-            for (int n = 0; n < part_size; n++)
-                ptr1[n] += sumout[n];
-
-            if (seg->loading[ch] == nb_partitions - 1)
-                memcpy(buf, sumout + part_size, part_size * sizeof(*buf));
-
-            for (int i = 0; i < part_size; i++) {
-                const ftype factor = (part_size * seg->loading[ch] + i) / (ftype)(part_size * nb_partitions);
-                const ftype ifactor = 1 - factor;
-                dst[i] = ptr1[i] * factor + ptr2[i] * ifactor;
+        fn(fir_fadd)(s, buf, sumout, part_size);
+        memcpy(dst, buf, part_size * sizeof(*dst));
+        memcpy(buf, sumout + part_size, part_size * sizeof(*buf));
+
+        if (s->selir != s->prev_selir) {
+            if (selir == s->selir) {
+                if (s->loading[ch] <= min_part_size) {
+                    for (int n = 0; n < nb_samples; n++)
+                        ptr[n] += dst[n] * ((n + 1.f) / nb_samples);
+                }
+            } else {
+                fn(fir_fadd)(s, ptr, dst, nb_samples);
             }
         } else {
-            fn(fir_fadd)(s, buf, sumout, part_size);
-            memcpy(dst, buf, part_size * sizeof(*dst));
-            memcpy(buf, sumout + part_size, part_size * sizeof(*buf));
+            fn(fir_fadd)(s, ptr, dst, nb_samples);
         }
 
-        fn(fir_fadd)(s, ptr, dst, nb_samples);
-
         if (part_size != min_part_size)
             memmove(src, src + min_part_size, (seg->input_size - min_part_size) * sizeof(*src));
 
         seg->part_index[ch] = (seg->part_index[ch] + 1) % nb_partitions;
-        if (seg->loading[ch] < nb_partitions)
-            seg->loading[ch]++;
     }
 
+    if (selir != s->selir) {
+        if (s->loading[ch] <= min_part_size) {
+            for (int n = 0; n < nb_samples; n++)
+                ptr[n] *= (nb_samples - n * 1.f) / nb_samples;
+        }
+        return 0;
+    }
+
+    if (s->selir != s->prev_selir)
+        s->loading[ch] -= min_part_size;
+
     if (s->wet_gain == 1.f)
         return 0;