[FFmpeg-cvslog] avfilter/vf_fftdnoiz: add slice threading support

Paul B Mahol git at videolan.org
Mon Oct 18 19:06:18 EEST 2021


ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Mon Oct 18 18:03:16 2021 +0200| [374f2ac370e1c109fde950613d539049c18fd974] | committer: Paul B Mahol

avfilter/vf_fftdnoiz: add slice threading support

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=374f2ac370e1c109fde950613d539049c18fd974
---

 libavfilter/vf_fftdnoiz.c | 236 ++++++++++++++++++++++++++++++----------------
 1 file changed, 154 insertions(+), 82 deletions(-)

diff --git a/libavfilter/vf_fftdnoiz.c b/libavfilter/vf_fftdnoiz.c
index e316d4c756..487e98b931 100644
--- a/libavfilter/vf_fftdnoiz.c
+++ b/libavfilter/vf_fftdnoiz.c
@@ -25,6 +25,8 @@
 #include "internal.h"
 #include "libavcodec/avfft.h"
 
+#define MAX_THREADS 32
+
 enum BufferTypes {
     CURRENT,
     PREV,
@@ -40,11 +42,9 @@ typedef struct PlaneContext {
     float n;
 
     float *buffer[BSIZE];
-    FFTComplex *hdata, *vdata;
+    FFTComplex *hdata[MAX_THREADS], *vdata[MAX_THREADS];
     int data_linesize;
     int buffer_linesize;
-
-    FFTContext *fft, *ifft;
 } PlaneContext;
 
 typedef struct FFTdnoizContext {
@@ -62,8 +62,11 @@ typedef struct FFTdnoizContext {
 
     int depth;
     int nb_planes;
+    int nb_threads;
     PlaneContext planes[4];
 
+    FFTContext *fft[MAX_THREADS], *ifft[MAX_THREADS];
+
     void (*import_row)(FFTComplex *dst, uint8_t *src, int rw);
     void (*export_row)(FFTComplex *src, uint8_t *dst, int rw, float scale, int depth);
 } FFTdnoizContext;
@@ -90,23 +93,6 @@ static const AVOption fftdnoiz_options[] = {
 
 AVFILTER_DEFINE_CLASS(fftdnoiz);
 
-static av_cold int init(AVFilterContext *ctx)
-{
-    FFTdnoizContext *s = ctx->priv;
-    int i;
-
-    for (i = 0; i < 4; i++) {
-        PlaneContext *p = &s->planes[i];
-
-        p->fft  = av_fft_init(s->block_bits, 0);
-        p->ifft = av_fft_init(s->block_bits, 1);
-        if (!p->fft || !p->ifft)
-            return AVERROR(ENOMEM);
-    }
-
-    return 0;
-}
-
 static const enum AVPixelFormat pix_fmts[] = {
     AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9,
     AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12,
@@ -201,6 +187,14 @@ static int config_input(AVFilterLink *inlink)
     s->planes[0].planeheight = s->planes[3].planeheight = inlink->h;
 
     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+    s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), MAX_THREADS);
+
+    for (int i = 0; i < s->nb_threads; i++) {
+        s->fft[i]  = av_fft_init(s->block_bits, 0);
+        s->ifft[i] = av_fft_init(s->block_bits, 1);
+        if (!s->fft[i] || !s->ifft[i])
+            return AVERROR(ENOMEM);
+    }
 
     for (i = 0; i < s->nb_planes; i++) {
         PlaneContext *p = &s->planes[i];
@@ -230,10 +224,12 @@ static int config_input(AVFilterLink *inlink)
                 return AVERROR(ENOMEM);
         }
         p->data_linesize = 2 * p->b * sizeof(float);
-        p->hdata = av_calloc(p->b, p->data_linesize);
-        p->vdata = av_calloc(p->b, p->data_linesize);
-        if (!p->hdata || !p->vdata)
-            return AVERROR(ENOMEM);
+        for (int j = 0; j < s->nb_threads; j++) {
+            p->hdata[j] = av_calloc(p->b, p->data_linesize);
+            p->vdata[j] = av_calloc(p->b, p->data_linesize);
+            if (!p->hdata[j] || !p->vdata[j])
+                return AVERROR(ENOMEM);
+        }
     }
 
     return 0;
@@ -241,7 +237,8 @@ static int config_input(AVFilterLink *inlink)
 
 static void import_plane(FFTdnoizContext *s,
                          uint8_t *srcp, int src_linesize,
-                         float *buffer, int buffer_linesize, int plane)
+                         float *buffer, int buffer_linesize, int plane,
+                         int jobnr, int nb_jobs)
 {
     PlaneContext *p = &s->planes[plane];
     const int width = p->planewidth;
@@ -253,12 +250,14 @@ static void import_plane(FFTdnoizContext *s,
     const int noy = p->noy;
     const int bpp = (s->depth + 7) / 8;
     const int data_linesize = p->data_linesize / sizeof(FFTComplex);
-    FFTComplex *hdata = p->hdata;
-    FFTComplex *vdata = p->vdata;
+    const int slice_start = (noy * jobnr) / nb_jobs;
+    const int slice_end = (noy * (jobnr+1)) / nb_jobs;
+    FFTComplex *hdata = p->hdata[jobnr];
+    FFTComplex *vdata = p->vdata[jobnr];
     int x, y, i, j;
 
     buffer_linesize /= sizeof(float);
-    for (y = 0; y < noy; y++) {
+    for (y = slice_start; y < slice_end; y++) {
         for (x = 0; x < nox; x++) {
             const int rh = FFMIN(block, height - y * size);
             const int rw = FFMIN(block, width  - x * size);
@@ -272,8 +271,8 @@ static void import_plane(FFTdnoizContext *s,
                     dst[j].re = dst[block - j - 1].re;
                     dst[j].im = 0;
                 }
-                av_fft_permute(p->fft, dst);
-                av_fft_calc(p->fft, dst);
+                av_fft_permute(s->fft[jobnr], dst);
+                av_fft_calc(s->fft[jobnr], dst);
 
                 src += src_linesize;
                 dst += data_linesize;
@@ -292,8 +291,8 @@ static void import_plane(FFTdnoizContext *s,
             for (i = 0; i < block; i++) {
                 for (j = 0; j < block; j++)
                     dst[j] = ssrc[j * data_linesize + i];
-                av_fft_permute(p->fft, dst);
-                av_fft_calc(p->fft, dst);
+                av_fft_permute(s->fft[jobnr], dst);
+                av_fft_calc(s->fft[jobnr], dst);
                 memcpy(bdst, dst, block * sizeof(FFTComplex));
 
                 dst += data_linesize;
@@ -305,7 +304,8 @@ static void import_plane(FFTdnoizContext *s,
 
 static void export_plane(FFTdnoizContext *s,
                          uint8_t *dstp, int dst_linesize,
-                         float *buffer, int buffer_linesize, int plane)
+                         float *buffer, int buffer_linesize, int plane,
+                         int jobnr, int nb_jobs)
 {
     PlaneContext *p = &s->planes[plane];
     const int depth = s->depth;
@@ -320,12 +320,14 @@ static void export_plane(FFTdnoizContext *s,
     const int noy = p->noy;
     const int data_linesize = p->data_linesize / sizeof(FFTComplex);
     const float scale = 1.f / (block * block);
-    FFTComplex *hdata = p->hdata;
-    FFTComplex *vdata = p->vdata;
+    const int slice_start = (noy * jobnr) / nb_jobs;
+    const int slice_end = (noy * (jobnr+1)) / nb_jobs;
+    FFTComplex *hdata = p->hdata[jobnr];
+    FFTComplex *vdata = p->vdata[jobnr];
     int x, y, i, j;
 
     buffer_linesize /= sizeof(float);
-    for (y = 0; y < noy; y++) {
+    for (y = slice_start; y < slice_end; y++) {
         for (x = 0; x < nox; x++) {
             const int woff = x == 0 ? 0 : hoverlap;
             const int hoff = y == 0 ? 0 : hoverlap;
@@ -338,8 +340,8 @@ static void export_plane(FFTdnoizContext *s,
             hdst = hdata;
             for (i = 0; i < block; i++) {
                 memcpy(ddst, bsrc, block * sizeof(FFTComplex));
-                av_fft_permute(p->ifft, ddst);
-                av_fft_calc(p->ifft, ddst);
+                av_fft_permute(s->ifft[jobnr], ddst);
+                av_fft_calc(s->ifft[jobnr], ddst);
                 for (j = 0; j < block; j++) {
                     hdst[j * data_linesize + i] = ddst[j];
                 }
@@ -350,8 +352,8 @@ static void export_plane(FFTdnoizContext *s,
 
             hdst = hdata + hoff * data_linesize;
             for (i = 0; i < rh; i++) {
-                av_fft_permute(p->ifft, hdst);
-                av_fft_calc(p->ifft, hdst);
+                av_fft_permute(s->ifft[jobnr], hdst);
+                av_fft_calc(s->ifft[jobnr], hdst);
                 s->export_row(hdst + woff, dst, rw, scale, depth);
 
                 hdst += data_linesize;
@@ -361,7 +363,8 @@ static void export_plane(FFTdnoizContext *s,
     }
 }
 
-static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer)
+static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer,
+                            int jobnr, int nb_jobs)
 {
     PlaneContext *p = &s->planes[plane];
     const int block = p->b;
@@ -369,13 +372,15 @@ static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float
     const int noy = p->noy;
     const int buffer_linesize = p->buffer_linesize / sizeof(float);
     const float sigma = s->sigma * s->sigma * block * block;
+    const int slice_start = (noy * jobnr) / nb_jobs;
+    const int slice_end = (noy * (jobnr+1)) / nb_jobs;
     const float limit = 1.f - s->amount;
     float *cbuffer = p->buffer[CURRENT];
     const float cfactor = sqrtf(3.f) * 0.5f;
     const float scale = 1.f / 3.f;
     int y, x, i, j;
 
-    for (y = 0; y < noy; y++) {
+    for (y = slice_start; y < slice_end; y++) {
         for (x = 0; x < nox; x++) {
             float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
             float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
@@ -421,7 +426,8 @@ static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float
     }
 }
 
-static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
+static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer,
+                            int jobnr, int nb_jobs)
 {
     PlaneContext *p = &s->planes[plane];
     const int block = p->b;
@@ -429,11 +435,13 @@ static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
     const int noy = p->noy;
     const int buffer_linesize = p->buffer_linesize / sizeof(float);
     const float sigma = s->sigma * s->sigma * block * block;
+    const int slice_start = (noy * jobnr) / nb_jobs;
+    const int slice_end = (noy * (jobnr+1)) / nb_jobs;
     const float limit = 1.f - s->amount;
     float *cbuffer = p->buffer[CURRENT];
     int y, x, i, j;
 
-    for (y = 0; y < noy; y++) {
+    for (y = slice_start; y < slice_end; y++) {
         for (x = 0; x < nox; x++) {
             float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
             float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
@@ -473,7 +481,8 @@ static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
     }
 }
 
-static void filter_plane2d(FFTdnoizContext *s, int plane)
+static void filter_plane2d(FFTdnoizContext *s, int plane,
+                           int jobnr, int nb_jobs)
 {
     PlaneContext *p = &s->planes[plane];
     const int block = p->b;
@@ -482,15 +491,16 @@ static void filter_plane2d(FFTdnoizContext *s, int plane)
     const int buffer_linesize = p->buffer_linesize / 4;
     const float sigma = s->sigma * s->sigma * block * block;
     const float limit = 1.f - s->amount;
+    const int slice_start = (noy * jobnr) / nb_jobs;
+    const int slice_end = (noy * (jobnr+1)) / nb_jobs;
     float *buffer = p->buffer[CURRENT];
-    int y, x, i, j;
 
-    for (y = 0; y < noy; y++) {
-        for (x = 0; x < nox; x++) {
+    for (int y = slice_start; y < slice_end; y++) {
+        for (int x = 0; x < nox; x++) {
             float *buff = buffer + buffer_linesize * y * block + x * block * 2;
 
-            for (i = 0; i < block; i++) {
-                for (j = 0; j < block; j++) {
+            for (int i = 0; i < block; i++) {
+                for (int j = 0; j < block; j++) {
                     float factor, power, re, im;
 
                     re = buff[j * 2    ];
@@ -507,6 +517,79 @@ static void filter_plane2d(FFTdnoizContext *s, int plane)
     }
 }
 
+static int import_pass(AVFilterContext *ctx, void *arg,
+                       int jobnr, int nb_jobs)
+{
+    FFTdnoizContext *s = ctx->priv;
+
+    for (int plane = 0; plane < s->nb_planes; plane++) {
+        PlaneContext *p = &s->planes[plane];
+
+        if (!((1 << plane) & s->planesf) || ctx->is_disabled)
+            continue;
+
+        if (s->next) {
+            import_plane(s, s->next->data[plane], s->next->linesize[plane],
+                         p->buffer[NEXT], p->buffer_linesize, plane,
+                         jobnr, nb_jobs);
+        }
+
+        if (s->prev) {
+            import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
+                         p->buffer[PREV], p->buffer_linesize, plane,
+                         jobnr, nb_jobs);
+        }
+
+        import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
+                     p->buffer[CURRENT], p->buffer_linesize, plane,
+                     jobnr, nb_jobs);
+    }
+
+    return 0;
+}
+
+static int filter_pass(AVFilterContext *ctx, void *arg,
+                       int jobnr, int nb_jobs)
+{
+    FFTdnoizContext *s = ctx->priv;
+
+    for (int plane = 0; plane < s->nb_planes; plane++) {
+        PlaneContext *p = &s->planes[plane];
+
+        if (!((1 << plane) & s->planesf) || ctx->is_disabled)
+            continue;
+
+        if (s->next && s->prev) {
+            filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT], jobnr, nb_jobs);
+        } else if (s->next) {
+            filter_plane3d1(s, plane, p->buffer[NEXT], jobnr, nb_jobs);
+        } else  if (s->prev) {
+            filter_plane3d1(s, plane, p->buffer[PREV], jobnr, nb_jobs);
+        } else {
+            filter_plane2d(s, plane, jobnr, nb_jobs);
+        }
+    }
+
+    return 0;
+}
+
+static int export_pass(AVFilterContext *ctx, void *arg,
+                       int jobnr, int nb_jobs)
+{
+    FFTdnoizContext *s = ctx->priv;
+    AVFrame *out = arg;
+
+    for (int plane = 0; plane < s->nb_planes; plane++) {
+        PlaneContext *p = &s->planes[plane];
+
+        export_plane(s, out->data[plane], out->linesize[plane],
+                     p->buffer[CURRENT], p->buffer_linesize, plane,
+                     jobnr, nb_jobs);
+    }
+
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
@@ -559,6 +642,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         av_frame_copy_props(out, s->cur);
     }
 
+    ff_filter_execute(ctx, import_pass, NULL, NULL,
+                      FFMIN(s->planes[0].noy, s->nb_threads));
+
+    ff_filter_execute(ctx, filter_pass, NULL, NULL,
+                      FFMIN(s->planes[0].noy, s->nb_threads));
+
+    ff_filter_execute(ctx, export_pass, out, NULL,
+                      FFMIN(s->planes[0].noy, s->nb_threads));
+
     for (plane = 0; plane < s->nb_planes; plane++) {
         PlaneContext *p = &s->planes[plane];
 
@@ -569,32 +661,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                                     p->planewidth, p->planeheight);
             continue;
         }
-
-        if (s->next) {
-            import_plane(s, s->next->data[plane], s->next->linesize[plane],
-                         p->buffer[NEXT], p->buffer_linesize, plane);
-        }
-
-        if (s->prev) {
-            import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
-                         p->buffer[PREV], p->buffer_linesize, plane);
-        }
-
-        import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
-                     p->buffer[CURRENT], p->buffer_linesize, plane);
-
-        if (s->next && s->prev) {
-            filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT]);
-        } else if (s->next) {
-            filter_plane3d1(s, plane, p->buffer[NEXT]);
-        } else  if (s->prev) {
-            filter_plane3d1(s, plane, p->buffer[PREV]);
-        } else {
-            filter_plane2d(s, plane);
-        }
-
-        export_plane(s, out->data[plane], out->linesize[plane],
-                     p->buffer[CURRENT], p->buffer_linesize, plane);
     }
 
     if (s->nb_next == 0 && s->nb_prev == 0) {
@@ -644,13 +710,19 @@ static av_cold void uninit(AVFilterContext *ctx)
     for (i = 0; i < 4; i++) {
         PlaneContext *p = &s->planes[i];
 
-        av_freep(&p->hdata);
-        av_freep(&p->vdata);
+        for (int j = 0; j < s->nb_threads; j++) {
+            av_freep(&p->hdata[j]);
+            av_freep(&p->vdata[j]);
+        }
+
         av_freep(&p->buffer[PREV]);
         av_freep(&p->buffer[CURRENT]);
         av_freep(&p->buffer[NEXT]);
-        av_fft_end(p->fft);
-        av_fft_end(p->ifft);
+    }
+
+    for (i = 0; i < s->nb_threads; i++) {
+        av_fft_end(s->fft[i]);
+        av_fft_end(s->ifft[i]);
     }
 
     av_frame_free(&s->prev);
@@ -679,11 +751,11 @@ const AVFilter ff_vf_fftdnoiz = {
     .name          = "fftdnoiz",
     .description   = NULL_IF_CONFIG_SMALL("Denoise frames using 3D FFT."),
     .priv_size     = sizeof(FFTdnoizContext),
-    .init          = init,
     .uninit        = uninit,
     FILTER_INPUTS(fftdnoiz_inputs),
     FILTER_OUTPUTS(fftdnoiz_outputs),
     FILTER_PIXFMTS_ARRAY(pix_fmts),
     .priv_class    = &fftdnoiz_class,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
+                     AVFILTER_FLAG_SLICE_THREADS,
 };



More information about the ffmpeg-cvslog mailing list