[FFmpeg-cvslog] avfilter/vf_convolve: implement slice threading

Paul B Mahol git at videolan.org
Mon Dec 25 13:01:00 EET 2017


ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Mon Dec 25 11:53:54 2017 +0100| [c59e49f9b2f30184e6d74255f3c1b2a1a89c6520] | committer: Paul B Mahol

avfilter/vf_convolve: implement slice threading

Signed-off-by: Paul B Mahol <onemda at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c59e49f9b2f30184e6d74255f3c1b2a1a89c6520
---

 libavfilter/vf_convolve.c | 142 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 100 insertions(+), 42 deletions(-)

diff --git a/libavfilter/vf_convolve.c b/libavfilter/vf_convolve.c
index e3d0c5fa2a..de58cdff8f 100644
--- a/libavfilter/vf_convolve.c
+++ b/libavfilter/vf_convolve.c
@@ -29,12 +29,14 @@
 #include "internal.h"
 #include "video.h"
 
+#define MAX_THREADS 16
+
 typedef struct ConvolveContext {
     const AVClass *class;
     FFFrameSync fs;
 
-    FFTContext *fft[4];
-    FFTContext *ifft[4];
+    FFTContext *fft[4][MAX_THREADS];
+    FFTContext *ifft[4][MAX_THREADS];
 
     int fft_bits[4];
     int fft_len[4];
@@ -152,15 +154,28 @@ static int config_input_impulse(AVFilterLink *inlink)
     return 0;
 }
 
-static void fft_horizontal(ConvolveContext *s, FFTComplex *fft_hdata,
-                           int n, int plane)
+typedef struct ThreadData {
+    FFTComplex *hdata, *vdata;
+    int plane, n;
+} ThreadData;
+
+static int fft_horizontal(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
+    ConvolveContext *s = ctx->priv;
+    ThreadData *td = arg;
+    FFTComplex *hdata = td->hdata;
+    const int plane = td->plane;
+    const int n = td->n;
+    int start = (n *  jobnr   ) / nb_jobs;
+    int end = (n * (jobnr+1)) / nb_jobs;
     int y;
 
-    for (y = 0; y < n; y++) {
-        av_fft_permute(s->fft[plane], fft_hdata + y * n);
-        av_fft_calc(s->fft[plane], fft_hdata + y * n);
+    for (y = start; y < end; y++) {
+        av_fft_permute(s->fft[plane][jobnr], hdata + y * n);
+        av_fft_calc(s->fft[plane][jobnr], hdata + y * n);
     }
+
+    return 0;
 }
 
 static void get_input(ConvolveContext *s, FFTComplex *fft_hdata,
@@ -238,46 +253,73 @@ static void get_input(ConvolveContext *s, FFTComplex *fft_hdata,
     }
 }
 
-static void fft_vertical(ConvolveContext *s, FFTComplex *fft_hdata, FFTComplex *fft_vdata,
-                         int n, int plane)
+static int fft_vertical(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
+    ConvolveContext *s = ctx->priv;
+    ThreadData *td = arg;
+    FFTComplex *hdata = td->hdata;
+    FFTComplex *vdata = td->vdata;
+    const int plane = td->plane;
+    const int n = td->n;
+    int start = (n *  jobnr   ) / nb_jobs;
+    int end = (n * (jobnr+1)) / nb_jobs;
     int y, x;
 
-    for (y = 0; y < n; y++) {
+    for (y = start; y < end; y++) {
         for (x = 0; x < n; x++) {
-            fft_vdata[y * n + x].re = fft_hdata[x * n + y].re;
-            fft_vdata[y * n + x].im = fft_hdata[x * n + y].im;
+            vdata[y * n + x].re = hdata[x * n + y].re;
+            vdata[y * n + x].im = hdata[x * n + y].im;
         }
 
-        av_fft_permute(s->fft[plane], fft_vdata + y * n);
-        av_fft_calc(s->fft[plane], fft_vdata + y * n);
+        av_fft_permute(s->fft[plane][jobnr], vdata + y * n);
+        av_fft_calc(s->fft[plane][jobnr], vdata + y * n);
     }
+
+    return 0;
 }
 
-static void ifft_vertical(ConvolveContext *s, int n, int plane)
+static int ifft_vertical(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
+    ConvolveContext *s = ctx->priv;
+    ThreadData *td = arg;
+    FFTComplex *hdata = td->hdata;
+    FFTComplex *vdata = td->vdata;
+    const int plane = td->plane;
+    const int n = td->n;
+    int start = (n *  jobnr   ) / nb_jobs;
+    int end = (n * (jobnr+1)) / nb_jobs;
     int y, x;
 
-    for (y = 0; y < n; y++) {
-        av_fft_permute(s->ifft[plane], s->fft_vdata[plane] + y * n);
-        av_fft_calc(s->ifft[plane], s->fft_vdata[plane] + y * n);
+    for (y = start; y < end; y++) {
+        av_fft_permute(s->ifft[plane][jobnr], vdata + y * n);
+        av_fft_calc(s->ifft[plane][jobnr], vdata + y * n);
 
         for (x = 0; x < n; x++) {
-            s->fft_hdata[plane][x * n + y].re = s->fft_vdata[plane][y * n + x].re;
-            s->fft_hdata[plane][x * n + y].im = s->fft_vdata[plane][y * n + x].im;
+            hdata[x * n + y].re = vdata[y * n + x].re;
+            hdata[x * n + y].im = vdata[y * n + x].im;
         }
     }
+
+    return 0;
 }
 
-static void ifft_horizontal(ConvolveContext *s, int n, int plane)
+static int ifft_horizontal(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    FFTComplex *input = s->fft_hdata[plane];
+    ConvolveContext *s = ctx->priv;
+    ThreadData *td = arg;
+    FFTComplex *hdata = td->hdata;
+    const int plane = td->plane;
+    const int n = td->n;
+    int start = (n *  jobnr   ) / nb_jobs;
+    int end = (n * (jobnr+1)) / nb_jobs;
     int y;
 
-    for (y = 0; y < n; y++) {
-        av_fft_permute(s->ifft[plane], input + y * n);
-        av_fft_calc(s->ifft[plane], input + y * n);
+    for (y = start; y < end; y++) {
+        av_fft_permute(s->ifft[plane][jobnr], hdata + y * n);
+        av_fft_calc(s->ifft[plane][jobnr], hdata + y * n);
     }
+
+    return 0;
 }
 
 static void get_output(ConvolveContext *s, AVFrame *out,
@@ -356,15 +398,20 @@ static int do_convolve(FFFrameSync *fs)
         const int w = s->planewidth[plane];
         const int h = s->planeheight[plane];
         float total = 0;
+        ThreadData td;
 
         if (!(s->planes & (1 << plane))) {
             continue;
         }
 
+        td.plane = plane, td.n = n;
         get_input(s, s->fft_hdata[plane], mainpic, w, h, n, plane, 1.f);
-        fft_horizontal(s, s->fft_hdata[plane], n, plane);
-        fft_vertical(s, s->fft_hdata[plane], s->fft_vdata[plane],
-                     n, plane);
+
+        td.hdata = s->fft_hdata[plane];
+        td.vdata = s->fft_vdata[plane];
+
+        ctx->internal->execute(ctx, fft_horizontal, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
+        ctx->internal->execute(ctx, fft_vertical, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
 
         if ((!s->impulse && !s->got_impulse[plane]) || s->impulse) {
             if (s->depth == 8) {
@@ -385,9 +432,12 @@ static int do_convolve(FFFrameSync *fs)
             total = FFMAX(1, total);
 
             get_input(s, s->fft_hdata_impulse[plane], impulsepic, w, h, n, plane, 1 / total);
-            fft_horizontal(s, s->fft_hdata_impulse[plane], n, plane);
-            fft_vertical(s, s->fft_hdata_impulse[plane], s->fft_vdata_impulse[plane],
-                         n, plane);
+
+            td.hdata = s->fft_hdata_impulse[plane];
+            td.vdata = s->fft_vdata_impulse[plane];
+
+            ctx->internal->execute(ctx, fft_horizontal, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
+            ctx->internal->execute(ctx, fft_vertical, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
 
             s->got_impulse[plane] = 1;
         }
@@ -408,8 +458,11 @@ static int do_convolve(FFFrameSync *fs)
             }
         }
 
-        ifft_vertical(s, n, plane);
-        ifft_horizontal(s, n, plane);
+        td.hdata = s->fft_hdata[plane];
+        td.vdata = s->fft_vdata[plane];
+
+        ctx->internal->execute(ctx, ifft_vertical, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
+        ctx->internal->execute(ctx, ifft_horizontal, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
         get_output(s, mainpic, w, h, n, plane);
     }
 
@@ -421,7 +474,7 @@ static int config_output(AVFilterLink *outlink)
     AVFilterContext *ctx = outlink->src;
     ConvolveContext *s = ctx->priv;
     AVFilterLink *mainlink = ctx->inputs[0];
-    int ret, i;
+    int ret, i, j;
 
     s->fs.on_event = do_convolve;
     ret = ff_framesync_init_dualinput(&s->fs, ctx);
@@ -437,10 +490,12 @@ static int config_output(AVFilterLink *outlink)
         return ret;
 
     for (i = 0; i < s->nb_planes; i++) {
-        s->fft[i]  = av_fft_init(s->fft_bits[i], 0);
-        s->ifft[i] = av_fft_init(s->fft_bits[i], 1);
-        if (!s->fft[i] || !s->ifft[i])
-            return AVERROR(ENOMEM);
+        for (j = 0; j < MAX_THREADS; j++) {
+            s->fft[i][j]  = av_fft_init(s->fft_bits[i], 0);
+            s->ifft[i][j] = av_fft_init(s->fft_bits[i], 1);
+            if (!s->fft[i][j] || !s->ifft[i][j])
+                return AVERROR(ENOMEM);
+        }
     }
 
     return 0;
@@ -455,15 +510,18 @@ static int activate(AVFilterContext *ctx)
 static av_cold void uninit(AVFilterContext *ctx)
 {
     ConvolveContext *s = ctx->priv;
-    int i;
+    int i, j;
 
     for (i = 0; i < 4; i++) {
         av_freep(&s->fft_hdata[i]);
         av_freep(&s->fft_vdata[i]);
         av_freep(&s->fft_hdata_impulse[i]);
         av_freep(&s->fft_vdata_impulse[i]);
-        av_fft_end(s->fft[i]);
-        av_fft_end(s->ifft[i]);
+
+        for (j = 0; j < MAX_THREADS; j++) {
+            av_fft_end(s->fft[i][j]);
+            av_fft_end(s->ifft[i][j]);
+        }
     }
 
     ff_framesync_uninit(&s->fs);
@@ -502,5 +560,5 @@ AVFilter ff_vf_convolve = {
     .priv_class    = &convolve_class,
     .inputs        = convolve_inputs,
     .outputs       = convolve_outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 };



More information about the ffmpeg-cvslog mailing list