[FFmpeg-cvslog] avfilter/vf_fftdnoiz: add slice threading support
Paul B Mahol
git at videolan.org
Mon Oct 18 19:06:18 EEST 2021
ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Mon Oct 18 18:03:16 2021 +0200| [374f2ac370e1c109fde950613d539049c18fd974] | committer: Paul B Mahol
avfilter/vf_fftdnoiz: add slice threading support
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=374f2ac370e1c109fde950613d539049c18fd974
---
libavfilter/vf_fftdnoiz.c | 236 ++++++++++++++++++++++++++++++----------------
1 file changed, 154 insertions(+), 82 deletions(-)
diff --git a/libavfilter/vf_fftdnoiz.c b/libavfilter/vf_fftdnoiz.c
index e316d4c756..487e98b931 100644
--- a/libavfilter/vf_fftdnoiz.c
+++ b/libavfilter/vf_fftdnoiz.c
@@ -25,6 +25,8 @@
#include "internal.h"
#include "libavcodec/avfft.h"
+#define MAX_THREADS 32
+
enum BufferTypes {
CURRENT,
PREV,
@@ -40,11 +42,9 @@ typedef struct PlaneContext {
float n;
float *buffer[BSIZE];
- FFTComplex *hdata, *vdata;
+ FFTComplex *hdata[MAX_THREADS], *vdata[MAX_THREADS];
int data_linesize;
int buffer_linesize;
-
- FFTContext *fft, *ifft;
} PlaneContext;
typedef struct FFTdnoizContext {
@@ -62,8 +62,11 @@ typedef struct FFTdnoizContext {
int depth;
int nb_planes;
+ int nb_threads;
PlaneContext planes[4];
+ FFTContext *fft[MAX_THREADS], *ifft[MAX_THREADS];
+
void (*import_row)(FFTComplex *dst, uint8_t *src, int rw);
void (*export_row)(FFTComplex *src, uint8_t *dst, int rw, float scale, int depth);
} FFTdnoizContext;
@@ -90,23 +93,6 @@ static const AVOption fftdnoiz_options[] = {
AVFILTER_DEFINE_CLASS(fftdnoiz);
-static av_cold int init(AVFilterContext *ctx)
-{
- FFTdnoizContext *s = ctx->priv;
- int i;
-
- for (i = 0; i < 4; i++) {
- PlaneContext *p = &s->planes[i];
-
- p->fft = av_fft_init(s->block_bits, 0);
- p->ifft = av_fft_init(s->block_bits, 1);
- if (!p->fft || !p->ifft)
- return AVERROR(ENOMEM);
- }
-
- return 0;
-}
-
static const enum AVPixelFormat pix_fmts[] = {
AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9,
AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12,
@@ -201,6 +187,14 @@ static int config_input(AVFilterLink *inlink)
s->planes[0].planeheight = s->planes[3].planeheight = inlink->h;
s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+ s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), MAX_THREADS);
+
+ for (int i = 0; i < s->nb_threads; i++) {
+ s->fft[i] = av_fft_init(s->block_bits, 0);
+ s->ifft[i] = av_fft_init(s->block_bits, 1);
+ if (!s->fft[i] || !s->ifft[i])
+ return AVERROR(ENOMEM);
+ }
for (i = 0; i < s->nb_planes; i++) {
PlaneContext *p = &s->planes[i];
@@ -230,10 +224,12 @@ static int config_input(AVFilterLink *inlink)
return AVERROR(ENOMEM);
}
p->data_linesize = 2 * p->b * sizeof(float);
- p->hdata = av_calloc(p->b, p->data_linesize);
- p->vdata = av_calloc(p->b, p->data_linesize);
- if (!p->hdata || !p->vdata)
- return AVERROR(ENOMEM);
+ for (int j = 0; j < s->nb_threads; j++) {
+ p->hdata[j] = av_calloc(p->b, p->data_linesize);
+ p->vdata[j] = av_calloc(p->b, p->data_linesize);
+ if (!p->hdata[j] || !p->vdata[j])
+ return AVERROR(ENOMEM);
+ }
}
return 0;
@@ -241,7 +237,8 @@ static int config_input(AVFilterLink *inlink)
static void import_plane(FFTdnoizContext *s,
uint8_t *srcp, int src_linesize,
- float *buffer, int buffer_linesize, int plane)
+ float *buffer, int buffer_linesize, int plane,
+ int jobnr, int nb_jobs)
{
PlaneContext *p = &s->planes[plane];
const int width = p->planewidth;
@@ -253,12 +250,14 @@ static void import_plane(FFTdnoizContext *s,
const int noy = p->noy;
const int bpp = (s->depth + 7) / 8;
const int data_linesize = p->data_linesize / sizeof(FFTComplex);
- FFTComplex *hdata = p->hdata;
- FFTComplex *vdata = p->vdata;
+ const int slice_start = (noy * jobnr) / nb_jobs;
+ const int slice_end = (noy * (jobnr+1)) / nb_jobs;
+ FFTComplex *hdata = p->hdata[jobnr];
+ FFTComplex *vdata = p->vdata[jobnr];
int x, y, i, j;
buffer_linesize /= sizeof(float);
- for (y = 0; y < noy; y++) {
+ for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) {
const int rh = FFMIN(block, height - y * size);
const int rw = FFMIN(block, width - x * size);
@@ -272,8 +271,8 @@ static void import_plane(FFTdnoizContext *s,
dst[j].re = dst[block - j - 1].re;
dst[j].im = 0;
}
- av_fft_permute(p->fft, dst);
- av_fft_calc(p->fft, dst);
+ av_fft_permute(s->fft[jobnr], dst);
+ av_fft_calc(s->fft[jobnr], dst);
src += src_linesize;
dst += data_linesize;
@@ -292,8 +291,8 @@ static void import_plane(FFTdnoizContext *s,
for (i = 0; i < block; i++) {
for (j = 0; j < block; j++)
dst[j] = ssrc[j * data_linesize + i];
- av_fft_permute(p->fft, dst);
- av_fft_calc(p->fft, dst);
+ av_fft_permute(s->fft[jobnr], dst);
+ av_fft_calc(s->fft[jobnr], dst);
memcpy(bdst, dst, block * sizeof(FFTComplex));
dst += data_linesize;
@@ -305,7 +304,8 @@ static void import_plane(FFTdnoizContext *s,
static void export_plane(FFTdnoizContext *s,
uint8_t *dstp, int dst_linesize,
- float *buffer, int buffer_linesize, int plane)
+ float *buffer, int buffer_linesize, int plane,
+ int jobnr, int nb_jobs)
{
PlaneContext *p = &s->planes[plane];
const int depth = s->depth;
@@ -320,12 +320,14 @@ static void export_plane(FFTdnoizContext *s,
const int noy = p->noy;
const int data_linesize = p->data_linesize / sizeof(FFTComplex);
const float scale = 1.f / (block * block);
- FFTComplex *hdata = p->hdata;
- FFTComplex *vdata = p->vdata;
+ const int slice_start = (noy * jobnr) / nb_jobs;
+ const int slice_end = (noy * (jobnr+1)) / nb_jobs;
+ FFTComplex *hdata = p->hdata[jobnr];
+ FFTComplex *vdata = p->vdata[jobnr];
int x, y, i, j;
buffer_linesize /= sizeof(float);
- for (y = 0; y < noy; y++) {
+ for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) {
const int woff = x == 0 ? 0 : hoverlap;
const int hoff = y == 0 ? 0 : hoverlap;
@@ -338,8 +340,8 @@ static void export_plane(FFTdnoizContext *s,
hdst = hdata;
for (i = 0; i < block; i++) {
memcpy(ddst, bsrc, block * sizeof(FFTComplex));
- av_fft_permute(p->ifft, ddst);
- av_fft_calc(p->ifft, ddst);
+ av_fft_permute(s->ifft[jobnr], ddst);
+ av_fft_calc(s->ifft[jobnr], ddst);
for (j = 0; j < block; j++) {
hdst[j * data_linesize + i] = ddst[j];
}
@@ -350,8 +352,8 @@ static void export_plane(FFTdnoizContext *s,
hdst = hdata + hoff * data_linesize;
for (i = 0; i < rh; i++) {
- av_fft_permute(p->ifft, hdst);
- av_fft_calc(p->ifft, hdst);
+ av_fft_permute(s->ifft[jobnr], hdst);
+ av_fft_calc(s->ifft[jobnr], hdst);
s->export_row(hdst + woff, dst, rw, scale, depth);
hdst += data_linesize;
@@ -361,7 +363,8 @@ static void export_plane(FFTdnoizContext *s,
}
}
-static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer)
+static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer,
+ int jobnr, int nb_jobs)
{
PlaneContext *p = &s->planes[plane];
const int block = p->b;
@@ -369,13 +372,15 @@ static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float
const int noy = p->noy;
const int buffer_linesize = p->buffer_linesize / sizeof(float);
const float sigma = s->sigma * s->sigma * block * block;
+ const int slice_start = (noy * jobnr) / nb_jobs;
+ const int slice_end = (noy * (jobnr+1)) / nb_jobs;
const float limit = 1.f - s->amount;
float *cbuffer = p->buffer[CURRENT];
const float cfactor = sqrtf(3.f) * 0.5f;
const float scale = 1.f / 3.f;
int y, x, i, j;
- for (y = 0; y < noy; y++) {
+ for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) {
float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
@@ -421,7 +426,8 @@ static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float
}
}
-static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
+static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer,
+ int jobnr, int nb_jobs)
{
PlaneContext *p = &s->planes[plane];
const int block = p->b;
@@ -429,11 +435,13 @@ static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
const int noy = p->noy;
const int buffer_linesize = p->buffer_linesize / sizeof(float);
const float sigma = s->sigma * s->sigma * block * block;
+ const int slice_start = (noy * jobnr) / nb_jobs;
+ const int slice_end = (noy * (jobnr+1)) / nb_jobs;
const float limit = 1.f - s->amount;
float *cbuffer = p->buffer[CURRENT];
int y, x, i, j;
- for (y = 0; y < noy; y++) {
+ for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) {
float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
@@ -473,7 +481,8 @@ static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
}
}
-static void filter_plane2d(FFTdnoizContext *s, int plane)
+static void filter_plane2d(FFTdnoizContext *s, int plane,
+ int jobnr, int nb_jobs)
{
PlaneContext *p = &s->planes[plane];
const int block = p->b;
@@ -482,15 +491,16 @@ static void filter_plane2d(FFTdnoizContext *s, int plane)
const int buffer_linesize = p->buffer_linesize / 4;
const float sigma = s->sigma * s->sigma * block * block;
const float limit = 1.f - s->amount;
+ const int slice_start = (noy * jobnr) / nb_jobs;
+ const int slice_end = (noy * (jobnr+1)) / nb_jobs;
float *buffer = p->buffer[CURRENT];
- int y, x, i, j;
- for (y = 0; y < noy; y++) {
- for (x = 0; x < nox; x++) {
+ for (int y = slice_start; y < slice_end; y++) {
+ for (int x = 0; x < nox; x++) {
float *buff = buffer + buffer_linesize * y * block + x * block * 2;
- for (i = 0; i < block; i++) {
- for (j = 0; j < block; j++) {
+ for (int i = 0; i < block; i++) {
+ for (int j = 0; j < block; j++) {
float factor, power, re, im;
re = buff[j * 2 ];
@@ -507,6 +517,79 @@ static void filter_plane2d(FFTdnoizContext *s, int plane)
}
}
+static int import_pass(AVFilterContext *ctx, void *arg,
+ int jobnr, int nb_jobs)
+{
+ FFTdnoizContext *s = ctx->priv;
+
+ for (int plane = 0; plane < s->nb_planes; plane++) {
+ PlaneContext *p = &s->planes[plane];
+
+ if (!((1 << plane) & s->planesf) || ctx->is_disabled)
+ continue;
+
+ if (s->next) {
+ import_plane(s, s->next->data[plane], s->next->linesize[plane],
+ p->buffer[NEXT], p->buffer_linesize, plane,
+ jobnr, nb_jobs);
+ }
+
+ if (s->prev) {
+ import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
+ p->buffer[PREV], p->buffer_linesize, plane,
+ jobnr, nb_jobs);
+ }
+
+ import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
+ p->buffer[CURRENT], p->buffer_linesize, plane,
+ jobnr, nb_jobs);
+ }
+
+ return 0;
+}
+
+static int filter_pass(AVFilterContext *ctx, void *arg,
+ int jobnr, int nb_jobs)
+{
+ FFTdnoizContext *s = ctx->priv;
+
+ for (int plane = 0; plane < s->nb_planes; plane++) {
+ PlaneContext *p = &s->planes[plane];
+
+ if (!((1 << plane) & s->planesf) || ctx->is_disabled)
+ continue;
+
+ if (s->next && s->prev) {
+ filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT], jobnr, nb_jobs);
+ } else if (s->next) {
+ filter_plane3d1(s, plane, p->buffer[NEXT], jobnr, nb_jobs);
+ } else if (s->prev) {
+ filter_plane3d1(s, plane, p->buffer[PREV], jobnr, nb_jobs);
+ } else {
+ filter_plane2d(s, plane, jobnr, nb_jobs);
+ }
+ }
+
+ return 0;
+}
+
+static int export_pass(AVFilterContext *ctx, void *arg,
+ int jobnr, int nb_jobs)
+{
+ FFTdnoizContext *s = ctx->priv;
+ AVFrame *out = arg;
+
+ for (int plane = 0; plane < s->nb_planes; plane++) {
+ PlaneContext *p = &s->planes[plane];
+
+ export_plane(s, out->data[plane], out->linesize[plane],
+ p->buffer[CURRENT], p->buffer_linesize, plane,
+ jobnr, nb_jobs);
+ }
+
+ return 0;
+}
+
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{
AVFilterContext *ctx = inlink->dst;
@@ -559,6 +642,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
av_frame_copy_props(out, s->cur);
}
+ ff_filter_execute(ctx, import_pass, NULL, NULL,
+ FFMIN(s->planes[0].noy, s->nb_threads));
+
+ ff_filter_execute(ctx, filter_pass, NULL, NULL,
+ FFMIN(s->planes[0].noy, s->nb_threads));
+
+ ff_filter_execute(ctx, export_pass, out, NULL,
+ FFMIN(s->planes[0].noy, s->nb_threads));
+
for (plane = 0; plane < s->nb_planes; plane++) {
PlaneContext *p = &s->planes[plane];
@@ -569,32 +661,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
p->planewidth, p->planeheight);
continue;
}
-
- if (s->next) {
- import_plane(s, s->next->data[plane], s->next->linesize[plane],
- p->buffer[NEXT], p->buffer_linesize, plane);
- }
-
- if (s->prev) {
- import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
- p->buffer[PREV], p->buffer_linesize, plane);
- }
-
- import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
- p->buffer[CURRENT], p->buffer_linesize, plane);
-
- if (s->next && s->prev) {
- filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT]);
- } else if (s->next) {
- filter_plane3d1(s, plane, p->buffer[NEXT]);
- } else if (s->prev) {
- filter_plane3d1(s, plane, p->buffer[PREV]);
- } else {
- filter_plane2d(s, plane);
- }
-
- export_plane(s, out->data[plane], out->linesize[plane],
- p->buffer[CURRENT], p->buffer_linesize, plane);
}
if (s->nb_next == 0 && s->nb_prev == 0) {
@@ -644,13 +710,19 @@ static av_cold void uninit(AVFilterContext *ctx)
for (i = 0; i < 4; i++) {
PlaneContext *p = &s->planes[i];
- av_freep(&p->hdata);
- av_freep(&p->vdata);
+ for (int j = 0; j < s->nb_threads; j++) {
+ av_freep(&p->hdata[j]);
+ av_freep(&p->vdata[j]);
+ }
+
av_freep(&p->buffer[PREV]);
av_freep(&p->buffer[CURRENT]);
av_freep(&p->buffer[NEXT]);
- av_fft_end(p->fft);
- av_fft_end(p->ifft);
+ }
+
+ for (i = 0; i < s->nb_threads; i++) {
+ av_fft_end(s->fft[i]);
+ av_fft_end(s->ifft[i]);
}
av_frame_free(&s->prev);
@@ -679,11 +751,11 @@ const AVFilter ff_vf_fftdnoiz = {
.name = "fftdnoiz",
.description = NULL_IF_CONFIG_SMALL("Denoise frames using 3D FFT."),
.priv_size = sizeof(FFTdnoizContext),
- .init = init,
.uninit = uninit,
FILTER_INPUTS(fftdnoiz_inputs),
FILTER_OUTPUTS(fftdnoiz_outputs),
FILTER_PIXFMTS_ARRAY(pix_fmts),
.priv_class = &fftdnoiz_class,
- .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
+ AVFILTER_FLAG_SLICE_THREADS,
};
More information about the ffmpeg-cvslog
mailing list