[FFmpeg-devel] [PATCH v1] avfilter/vf_guided: support enhanced guided filter
Xuewei Meng
928826483 at qq.com
Wed Aug 25 14:58:45 EEST 2021
From: Xuewei Meng <xwmeng96 at gmail.com>
Enhanced guided filter and fast enhanced guided filter are supported.
The enhanced guided filter can better preserve edges for denoising tasks.
And the box filter operation is optimized.
Signed-off-by: Xuewei Meng <xwmeng96 at gmail.com>
---
doc/filters.texi | 14 +-
libavfilter/vf_guided.c | 331 ++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 315 insertions(+), 30 deletions(-)
diff --git a/doc/filters.texi b/doc/filters.texi
index f8d99b7..c65a347 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -13093,13 +13093,13 @@ Set regularization parameter (with square).
Allowed range is 0 to 1. Default is 0.01.
@item mode
-Set filter mode. Can be @code{basic} or @code{fast}.
+Set filter mode. Can be @code{basic}, @code{fast basic}, @code{enhanced} or @code{fast enhanced}.
Default is @code{basic}.
@item sub
-Set subsampling ratio for @code{fast} mode.
+Set subsampling ratio for @code{fast} or @code{fast enhanced} mode.
Range is 2 to 64. Default is 4.
-No subsampling occurs in @code{basic} mode.
+No subsampling occurs in @code{basic} and @code{enhanced} mode.
@item guidance
Set guidance mode. Can be @code{off} or @code{on}. Default is @code{off}.
@@ -13123,6 +13123,14 @@ ffmpeg -i in.png -vf guided out.png
@end example
@item
+Edge-preserving smoothing with enhanced guided filter.
+For the details of enhanced guided filter, refer to paper "Side window guided filtering".
+See: @url{https://www.sciencedirect.com/science/article/abs/pii/S0165168419302798}.
+ at example
+ffmpeg -i in.png -vf guided=mode=2 out.png
+ at end example
+
+ at item
Dehazing, structure-transferring filtering, detail enhancement with guided filter.
For the generation of guidance image, refer to paper "Guided Image Filtering".
See: @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c
index 4003b95..c030080 100644
--- a/libavfilter/vf_guided.c
+++ b/libavfilter/vf_guided.c
@@ -30,7 +30,9 @@
enum FilterModes {
BASIC,
- FAST,
+ FAST_BASIC,
+ ENHANCED,
+ FAST_ENHANCED,
NB_MODES,
};
@@ -40,6 +42,18 @@ enum GuidanceModes {
NB_GUIDANCE_MODES,
};
+enum SideWindowModes {
+ LEFT,
+ RIGHT,
+ UP,
+ DOWN,
+ NW, // North west
+ NE, // North east
+ SW, // South west
+ SE, // South east
+ NB_SIDEWINDOWMODES
+};
+
typedef struct GuidedContext {
const AVClass *class;
FFFrameSync fs;
@@ -60,6 +74,7 @@ typedef struct GuidedContext {
int planeheight[4];
int (*box_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+ int (*box_enhanced_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
} GuidedContext;
#define OFFSET(x) offsetof(GuidedContext, x)
@@ -68,9 +83,12 @@ typedef struct GuidedContext {
static const AVOption guided_options[] = {
{ "radius", "set the box radius", OFFSET(radius), AV_OPT_TYPE_INT, {.i64 = 3 }, 1, 20, FLAGS },
{ "eps", "set the regularization parameter (with square)", OFFSET(eps), AV_OPT_TYPE_FLOAT, {.dbl = 0.01 }, 0.0, 1, FLAGS },
- { "mode", "set filtering mode (0: basic mode; 1: fast mode)", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = BASIC}, BASIC, NB_MODES - 1, FLAGS, "mode" },
+ { "mode", "set filtering mode (0: basic mode; 1: fast basic mode; 3: enhanced mode; 4: fast enhanced mode)",
+ OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = BASIC }, BASIC, NB_MODES - 1, FLAGS, "mode" },
{ "basic", "basic guided filter", 0, AV_OPT_TYPE_CONST, {.i64 = BASIC}, 0, 0, FLAGS, "mode" },
- { "fast", "fast guided filter", 0, AV_OPT_TYPE_CONST, {.i64 = FAST }, 0, 0, FLAGS, "mode" },
+ { "basic fast", "fast basic guided filter", 0, AV_OPT_TYPE_CONST, {.i64 = FAST_BASIC }, 0, 0, FLAGS, "mode" },
+ { "enhanced", "enhanced guided filter", 0, AV_OPT_TYPE_CONST, {.i64 = ENHANCED }, 0, 0, FLAGS, "mode" },
+ { "enhanced fast", "fast enhanced guided filter", 0, AV_OPT_TYPE_CONST, {.i64 = FAST_ENHANCED }, 0, 0, FLAGS, "mode" },
{ "sub", "subsampling ratio for fast mode", OFFSET(sub), AV_OPT_TYPE_INT, {.i64 = 4 }, 2, 64, FLAGS },
{ "guidance", "set guidance mode (0: off mode; 1: on mode)", OFFSET(guidance), AV_OPT_TYPE_INT, {.i64 = OFF }, OFF, NB_GUIDANCE_MODES - 1, FLAGS, "guidance" },
{ "off", "only one input is enabled", 0, AV_OPT_TYPE_CONST, {.i64 = OFF }, 0, 0, FLAGS, "guidance" },
@@ -88,6 +106,7 @@ typedef struct ThreadData {
float *dst;
int srcStride;
int dstStride;
+ int sideWindowMode;
} ThreadData;
static int box_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
@@ -104,26 +123,131 @@ static int box_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
const int radius = s->radius;
const float *src = t->src;
float *dst = t->dst;
+ int numPix;
+ int temp_start, temp_end;
+ float *temp;
+ temp_start = (slice_start - radius) < 0 ? 0 : (slice_start - radius);
+ temp_end = (slice_end + radius) >= height ? (height - 1) : (slice_end + radius);
+ temp = av_calloc(width * (temp_end - temp_start + 1), sizeof(float));
+
+ temp[0] = src[temp_start * src_stride];
+ for (int j = 1;j < width;j++) {
+ temp[j] = src[temp_start * src_stride + j] + temp[j - 1];
+ }
+ for (int i = temp_start + 1;i <= temp_end;i++) {
+ int idx = (i - temp_start) * width;
+ temp[idx] = src[i * src_stride] + temp[idx - width];
+ }
+ for (int i = temp_start + 1;i <= temp_end;i++) {
+ for (int j = 1;j < width;j++) {
+ int idx = (i - temp_start) * width + j;
+ int idx1 = idx - width;
+ int idx2 = idx - 1;
+ int idx3 = idx1 - 1;
+ temp[idx] = temp[idx1] + temp[idx2] - temp[idx3] + src[i * src_stride + j];
+ }
+ }
- int w;
+ for (int i = slice_start;i < slice_end;i++) {
+ int x_start = i - radius;
+ int x_end = i + radius;
+ x_start = (x_start < 0) ? 0 : x_start;
+ x_end = (x_end >= height) ? height - 1 : x_end;
+ for (int j = 0;j < width;j++) {
+ int y_start = j - radius;
+ int y_end = j + radius;
+ int idx = i * dst_stride + j;
+ y_start = (y_start < 0) ? 0 : y_start;
+ y_end = (y_end >= width) ? width - 1 : y_end;
+
+ numPix = (x_end - x_start + 1) * (y_end - y_start + 1);
+ dst[idx] = temp[(x_end - temp_start) * width + y_end];
+ if ((x_start - 1 - temp_start) >= 0) {
+ dst[idx] -= temp[(x_start - 1 - temp_start) * width + y_end];
+ }
+ if ((y_start - 1) >= 0) {
+ dst[idx] -= temp[(x_end - temp_start) * width + y_start - 1];
+ if ((x_start - 1 - temp_start) >= 0) {
+ dst[idx] += temp[(x_start - 1 - temp_start) * width + y_start - 1];
+ }
+ }
+ dst[idx] /= numPix;
+ }
+ }
+ return 0;
+}
+
+static int box_enhanced_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ GuidedContext *s = ctx->priv;
+ ThreadData *t = arg;
+
+ const int width = t->width;
+ const int height = t->height;
+ const int src_stride = t->srcStride;
+ const int dst_stride = t->dstStride;
+ const int sideMode = t->sideWindowMode;
+ const int slice_start = (height * jobnr) / nb_jobs;
+ const int slice_end = (height * (jobnr + 1)) / nb_jobs;
+ const int radius = s->radius;
+ const int left[] = { -radius, 0, -radius, -radius, -radius, 0, -radius, 0 };
+ const int right[] = { 0, radius, radius, radius, 0, radius, 0, radius };
+ const int up[] = { -radius, -radius, -radius, 0, -radius, -radius, 0, 0 };
+ const int down[] = { radius, radius, 0, radius, 0, 0, radius, radius };
+ const float *src = t->src;
+ float *dst = t->dst;
int numPix;
- w = (radius << 1) + 1;
- numPix = w * w;
+ int temp_start, temp_end;
+ float *temp;
+ temp_start = (slice_start + up[sideMode]) < 0 ? 0 : (slice_start + up[sideMode]);
+ temp_end = (slice_end + down[sideMode]) >= height ? (height - 1) : (slice_end + down[sideMode]);
+ temp = av_calloc(width * (temp_end - temp_start + 1), sizeof(float));
+
+ temp[0] = src[temp_start * src_stride];
+ for (int j = 1;j < width;j++) {
+ temp[j] = src[temp_start * src_stride + j] + temp[j - 1];
+ }
+ for (int i = temp_start + 1;i <= temp_end;i++) {
+ int idx = (i - temp_start) * width;
+ temp[idx] = src[i * src_stride] + temp[idx - width];
+ }
+ for (int i = temp_start + 1;i <= temp_end;i++) {
+ for (int j = 1;j < width;j++) {
+ int idx = (i - temp_start) * width + j;
+ int idx1 = idx - width;
+ int idx2 = idx - 1;
+ int idx3 = idx1 - 1;
+ temp[idx] = temp[idx1] + temp[idx2] - temp[idx3] + src[i * src_stride + j];
+ }
+ }
+
for (int i = slice_start;i < slice_end;i++) {
- for (int j = 0;j < width;j++) {
- float temp = 0.0;
- for (int row = -radius;row <= radius;row++) {
- for (int col = -radius;col <= radius;col++) {
- int x = i + row;
- int y = j + col;
- x = (x < 0) ? 0 : (x >= height ? height - 1 : x);
- y = (y < 0) ? 0 : (y >= width ? width - 1 : y);
- temp += src[x * src_stride + y];
- }
+ int x_start = i + up[sideMode];
+ int x_end = i + down[sideMode];
+ x_start = (x_start < 0) ? 0 : x_start;
+ x_end = (x_end >= height) ? height - 1 : x_end;
+ for (int j = 0;j < width;j++) {
+ int y_start = j + left[sideMode];
+ int y_end = j + right[sideMode];
+ int idx = i * dst_stride + j;
+ y_start = (y_start < 0) ? 0 : y_start;
+ y_end = (y_end >= width) ? width - 1 : y_end;
+
+ numPix = (x_end - x_start + 1) * (y_end - y_start + 1);
+ dst[idx] = temp[(x_end - temp_start) * width + y_end];
+ if ((x_start - 1 - temp_start) >= 0) {
+ dst[idx] -= temp[(x_start - 1 - temp_start) * width + y_end];
+ }
+ if ((y_start - 1) >= 0) {
+ dst[idx] -= temp[(x_end - temp_start) * width + y_start - 1];
+ if ((x_start - 1 - temp_start) >= 0) {
+ dst[idx] += temp[(x_start - 1 - temp_start) * width + y_start - 1];
+ }
+ }
+ dst[idx] /= numPix;
}
- dst[i * dst_stride + j] = temp / numPix;
- }
}
+
return 0;
}
@@ -159,10 +283,9 @@ static int config_input(AVFilterLink *inlink)
GuidedContext *s = ctx->priv;
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
- if (s->mode == BASIC) {
+ if (s->mode == BASIC || s->mode == ENHANCED) {
s->sub = 1;
- }
- else if (s->mode == FAST) {
+ } else if (s->mode == FAST_BASIC || s->mode == FAST_ENHANCED) {
if (s->radius >= s->sub)
s->radius = s->radius / s->sub;
else {
@@ -181,6 +304,7 @@ static int config_input(AVFilterLink *inlink)
s->nb_planes = av_pix_fmt_count_planes(inlink->format);
s->box_slice = box_slice;
+ s->box_enhanced_slice = box_enhanced_slice;
return 0;
}
@@ -304,6 +428,142 @@ end:
GUIDED(uint8_t, byte)
GUIDED(uint16_t, word)
+#define GUIDED_ENHANCED(type, name) \
+static int guided_enhanced_##name(AVFilterContext *ctx, GuidedContext *s, \
+ const uint8_t *ssrc, const uint8_t *ssrcRef, \
+ uint8_t *ddst, int radius, float eps, int width, int height, \
+ int src_stride, int src_ref_stride, int dst_stride, \
+ float maxval) \
+{ \
+ int ret = 0; \
+ type *dst = (type *)ddst; \
+ const type *src = (const type *)ssrc; \
+ const type *srcRef = (const type *)ssrcRef; \
+ \
+ int sub = s->sub; \
+ int h = (height % sub) == 0 ? height / sub : height / sub + 1; \
+ int w = (width % sub) == 0 ? width / sub : width / sub + 1; \
+ \
+ ThreadData t; \
+ const int nb_threads = ff_filter_get_nb_threads(ctx); \
+ float *I; \
+ float *II; \
+ float *P; \
+ float *IP; \
+ float *meanI; \
+ float *meanII; \
+ float *meanP; \
+ float *meanIP; \
+ float *A; \
+ float *B; \
+ float *meanA; \
+ float *meanB; \
+ \
+ I = av_calloc(w * h, sizeof(float)); \
+ II = av_calloc(w * h, sizeof(float)); \
+ P = av_calloc(w * h, sizeof(float)); \
+ IP = av_calloc(w * h, sizeof(float)); \
+ meanI = av_calloc(w * h, sizeof(float)); \
+ meanII = av_calloc(w * h, sizeof(float)); \
+ meanP = av_calloc(w * h, sizeof(float)); \
+ meanIP = av_calloc(w * h, sizeof(float)); \
+ \
+ A = av_calloc(w * h, sizeof(float)); \
+ B = av_calloc(w * h, sizeof(float)); \
+ meanA = av_calloc(w * h, sizeof(float)); \
+ meanB = av_calloc(w * h, sizeof(float)); \
+ \
+ if (!I || !II || !P || !IP || !meanI || !meanII || !meanP || \
+ !meanIP || !A || !B || !meanA || !meanB) { \
+ ret = AVERROR(ENOMEM); \
+ goto end; \
+ } \
+ for (int i = 0;i < h;i++) { \
+ for (int j = 0;j < w;j++) { \
+ int x = i * w + j; \
+ I[x] = src[(i * src_stride + j) * sub] / maxval; \
+ II[x] = I[x] * I[x]; \
+ P[x] = srcRef[(i * src_ref_stride + j) * sub] / maxval; \
+ IP[x] = I[x] * P[x]; \
+ } \
+ } \
+ \
+ for(int modeIdx = 0;modeIdx < NB_SIDEWINDOWMODES;modeIdx++) { \
+ t.width = w; \
+ t.height = h; \
+ t.srcStride = w; \
+ t.dstStride = w; \
+ t.sideWindowMode = modeIdx; \
+ t.src = I; \
+ t.dst = meanI; \
+ ff_filter_execute(ctx, s->box_enhanced_slice, &t, NULL, FFMIN(h, nb_threads)); \
+ t.src = II; \
+ t.dst = meanII; \
+ ff_filter_execute(ctx, s->box_enhanced_slice, &t, NULL, FFMIN(h, nb_threads)); \
+ t.src = P; \
+ t.dst = meanP; \
+ ff_filter_execute(ctx, s->box_enhanced_slice, &t, NULL, FFMIN(h, nb_threads)); \
+ t.src = IP; \
+ t.dst = meanIP; \
+ ff_filter_execute(ctx, s->box_enhanced_slice, &t, NULL, FFMIN(h, nb_threads)); \
+ \
+ for (int i = 0;i < h;i++) { \
+ for (int j = 0;j < w;j++) { \
+ int x = i * w + j; \
+ float varI = meanII[x] - (meanI[x] * meanI[x]); \
+ float covIP = meanIP[x] - (meanI[x] * meanP[x]); \
+ A[x] = covIP / (varI + eps); \
+ B[x] = meanP[x] - A[x] * meanI[x]; \
+ } \
+ } \
+ \
+ t.src = A; \
+ t.dst = meanA; \
+ ff_filter_execute(ctx, s->box_enhanced_slice, &t, NULL, FFMIN(h, nb_threads)); \
+ t.src = B; \
+ t.dst = meanB; \
+ ff_filter_execute(ctx, s->box_enhanced_slice, &t, NULL, FFMIN(h, nb_threads)); \
+ \
+ if (modeIdx == 0) { \
+ for (int i = 0;i < height;i++) { \
+ for (int j = 0;j < width;j++) { \
+ int x = i / sub * w + j / sub; \
+ dst[i * dst_stride + j] = meanA[x] * src[i * src_stride + j] + \
+ meanB[x] * maxval; \
+ } \
+ } \
+ } else { \
+ for (int i = 0;i < height;i++) { \
+ for (int j = 0;j < width;j++) { \
+ int x = i / sub * w + j / sub; \
+ float temp = meanA[x] * src[i * src_stride + j] + \
+ meanB[x] * maxval; \
+ if (abs(src[i * src_stride + j] - temp) < \
+ abs(src[i * src_stride + j] - dst[i * dst_stride + j])) \
+ dst[i * dst_stride + j] = temp; \
+ } \
+ } \
+ } \
+ } \
+end: \
+ av_freep(&I); \
+ av_freep(&II); \
+ av_freep(&P); \
+ av_freep(&IP); \
+ av_freep(&meanI); \
+ av_freep(&meanII); \
+ av_freep(&meanP); \
+ av_freep(&meanIP); \
+ av_freep(&A); \
+ av_freep(&B); \
+ av_freep(&meanA); \
+ av_freep(&meanB); \
+ return ret; \
+}
+
+GUIDED_ENHANCED(uint8_t, byte)
+GUIDED_ENHANCED(uint16_t, word)
+
static int filter_frame(AVFilterContext *ctx, AVFrame **out, AVFrame *in, AVFrame *ref)
{
GuidedContext *s = ctx->priv;
@@ -321,13 +581,25 @@ static int filter_frame(AVFilterContext *ctx, AVFrame **out, AVFrame *in, AVFram
continue;
}
if (s->depth <= 8)
- guided_byte(ctx, s, in->data[plane], ref->data[plane], (*out)->data[plane], s->radius, s->eps,
- s->planewidth[plane], s->planeheight[plane],
- in->linesize[plane], ref->linesize[plane], (*out)->linesize[plane], (1 << s->depth) - 1.f);
+ if (s->mode == ENHANCED || s->mode == FAST_ENHANCED) {
+ guided_enhanced_byte(ctx, s, in->data[plane], ref->data[plane], (*out)->data[plane], s->radius, s->eps,
+ s->planewidth[plane], s->planeheight[plane],
+ in->linesize[plane], ref->linesize[plane], (*out)->linesize[plane], (1 << s->depth) - 1.f);
+ } else {
+ guided_byte(ctx, s, in->data[plane], ref->data[plane], (*out)->data[plane], s->radius, s->eps,
+ s->planewidth[plane], s->planeheight[plane],
+ in->linesize[plane], ref->linesize[plane], (*out)->linesize[plane], (1 << s->depth) - 1.f);
+ }
else
- guided_word(ctx, s, in->data[plane], ref->data[plane], (*out)->data[plane], s->radius, s->eps,
- s->planewidth[plane], s->planeheight[plane],
- in->linesize[plane] / 2, ref->linesize[plane] / 2, (*out)->linesize[plane] / 2, (1 << s->depth) - 1.f);
+ if (s->mode == ENHANCED || s->mode == FAST_ENHANCED) {
+ guided_enhanced_word(ctx, s, in->data[plane], ref->data[plane], (*out)->data[plane], s->radius, s->eps,
+ s->planewidth[plane], s->planeheight[plane],
+ in->linesize[plane] / 2, ref->linesize[plane] / 2, (*out)->linesize[plane] / 2, (1 << s->depth) - 1.f);
+ } else {
+ guided_word(ctx, s, in->data[plane], ref->data[plane], (*out)->data[plane], s->radius, s->eps,
+ s->planewidth[plane], s->planeheight[plane],
+ in->linesize[plane] / 2, ref->linesize[plane] / 2, (*out)->linesize[plane] / 2, (1 << s->depth) - 1.f);
+ }
}
return 0;
@@ -373,6 +645,11 @@ static int config_output(AVFilterLink *outlink)
}
}
+ if (s->guidance == ON && (s->mode == ENHANCED || s->mode == FAST_ENHANCED)) {
+ av_log(ctx, AV_LOG_ERROR, "Enhanced guided filter is only available when guidance is off.\n");
+ return AVERROR(EINVAL);
+ }
+
outlink->w = mainlink->w;
outlink->h = mainlink->h;
outlink->time_base = mainlink->time_base;
--
1.9.1
More information about the ffmpeg-devel
mailing list