[FFmpeg-devel] [PATCH] libavfilter: Add derain filter init version--GSoC Qualification Task.

Pedro Arthur bygrandao at gmail.com
Tue Apr 9 20:21:06 EEST 2019


Hi,

Em ter, 9 de abr de 2019 às 04:15, <xwmeng at pku.edu.cn> escreveu:
> + at section derain
> +
> +Remove the rain in the input image/video by applying the derain methods based on
> +convolutional neural networks. Supported models:
> +
> + at itemize
> + at item
> +Efficient Sub-Pixel Convolutional Neural Network model (ESPCN).
> +See @url{https://arxiv.org/abs/1609.05158}.
> + at end itemize

As the doc suggests, you're using the espcn model for deraining? if
so, it would be more relevant to link to paper which justifies this
usage as it currently seems to suggest you're using super-resolution.

In case you are the one which is proposing this usage, it worth at
least give some justification. is it better the current methods in any
way?


> +
> +Training scripts as well as scripts for model generation are provided in
> +the repository at @url{https://github.com/XueweiMeng/derain_filter.git}.
> +
> +The filter accepts the following options:
> +
> + at table @option
> + at item dnn_backend
> +Specify which DNN backend to use for model loading and execution. This option accepts
> +the following values:
> +
> + at table @samp
> + at item native
> +Native implementation of DNN loading and execution.
> +
> + at item tensorflow
> +TensorFlow backend. To enable this backend you
> +need to install the TensorFlow for C library (see
> + at url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with
> + at code{--enable-libtensorflow}
> + at end table
> +
> +Default value is @samp{native}.
> +
> + at item model
> +Set path to model file specifying network architecture and its parameters.
> +Note that different backends use different file formats. TensorFlow backend
> +can load files for both formats, while native backend can load files for only
> +its format.
> + at end table
> +
>  @section deshake
>
>  Attempt to fix small changes in horizontal and/or vertical shift. This
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index fef6ec5c55..7809bac565 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -194,6 +194,7 @@ OBJS-$(CONFIG_DATASCOPE_FILTER)              += vf_datascope.o
>  OBJS-$(CONFIG_DCTDNOIZ_FILTER)               += vf_dctdnoiz.o
>  OBJS-$(CONFIG_DEBAND_FILTER)                 += vf_deband.o
>  OBJS-$(CONFIG_DEBLOCK_FILTER)                += vf_deblock.o
> +OBJS-$(CONFIG_DERAIN_FILTER)                 += vf_derain.o
>  OBJS-$(CONFIG_DECIMATE_FILTER)               += vf_decimate.o
>  OBJS-$(CONFIG_DECONVOLVE_FILTER)             += vf_convolve.o framesync.o
>  OBJS-$(CONFIG_DEDOT_FILTER)                  += vf_dedot.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index c51ae0f3c7..ee2a5b63e6 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -182,6 +182,7 @@ extern AVFilter ff_vf_datascope;
>  extern AVFilter ff_vf_dctdnoiz;
>  extern AVFilter ff_vf_deband;
>  extern AVFilter ff_vf_deblock;
> +extern AVFilter ff_vf_derain;
>  extern AVFilter ff_vf_decimate;
>  extern AVFilter ff_vf_deconvolve;
>  extern AVFilter ff_vf_dedot;
> diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c
> new file mode 100644
> index 0000000000..f72ae1cd3a
> --- /dev/null
> +++ b/libavfilter/vf_derain.c
> @@ -0,0 +1,204 @@
> +/*
> + * Copyright (c) 2019 Xuewei Meng
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Filter implementing image derain filter using deep convolutional networks.
> + * https://arxiv.org/abs/1609.05158
> + * http://openaccess.thecvf.com/content_ECCV_2018/html/Xia_Li_Recurrent_Squeeze-and-Excitation_Context_ECCV_2018_paper.html
> + */
> +
> +#include "libavutil/opt.h"
> +#include "libavformat/avio.h"
> +#include "libswscale/swscale.h"
> +#include "avfilter.h"
> +#include "formats.h"
> +#include "internal.h"
> +#include "dnn_interface.h"
> +
> +typedef struct DRContext {
> +    const AVClass *class;
> +
> +    char              *model_filename;
> +    DNNBackendType     backend_type;
> +    DNNModule         *dnn_module;
> +    DNNModel          *model;
> +    DNNData            input;
> +    DNNData            output;
> +} DRContext;
> +
> +#define OFFSET(x) offsetof(DRContext, x)
> +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
> +static const AVOption derain_options[] = {
> +    { "dnn_backend", "DNN backend",             OFFSET(backend_type),   AV_OPT_TYPE_FLAGS,  { .i64 = 0 },    0, 1, FLAGS, "backend" },
> +    { "native",      "native backend flag",     0,                      AV_OPT_TYPE_CONST,  { .i64 = 0 },    0, 0, FLAGS, "backend" },
> +#if (CONFIG_LIBTENSORFLOW == 1)
> +    { "tensorflow",  "tensorflow backend flag", 0,                      AV_OPT_TYPE_CONST,  { .i64 = 1 },    0, 0, FLAGS, "backend" },
> +#endif
> +    { "model",       "path to model file",      OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
> +    { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(derain);
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    AVFilterFormats *formats;
> +    const enum AVPixelFormat pixel_fmts[] = {
> +        AV_PIX_FMT_RGB24,
> +        AV_PIX_FMT_NONE
> +    };
> +
> +    formats = ff_make_format_list(pixel_fmts);
> +    if (!formats) {
> +        av_log(ctx, AV_LOG_ERROR, "could not create formats list\n");
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    return ff_set_common_formats(ctx, formats);
> +}
> +
> +static int config_inputs(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx     = inlink->dst;
> +    DRContext *dr_context    = ctx->priv;
> +    AVFilterLink *outlink    = ctx->outputs[0];
> +    DNNReturnType result;
> +
> +    dr_context->input.width    = inlink->w;
> +    dr_context->input.height   = inlink->h;
> +    dr_context->input.channels = 3;
> +
> +    result = (dr_context->model->set_input_output)(dr_context->model->model, &dr_context->input, &dr_context->output);
> +    if (result != DNN_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n");
> +        return AVERROR(EIO);
> +    }
> +
> +    outlink->h = dr_context->output.height;
> +    outlink->w = dr_context->output.width;
> +
> +    return 0;
> +}
> +
> +static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> +{
> +    AVFilterContext *ctx  = inlink->dst;
> +    AVFilterLink *outlink = ctx->outputs[0];
> +    DRContext *dr_context = ctx->priv;
> +    DNNReturnType dnn_result;
> +
> +    AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> +    if (!out) {
> +        av_log(ctx, AV_LOG_ERROR, "could not allocate memory for output frame\n");
> +        av_frame_free(&in);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    av_frame_copy_props(out, in);
> +    out->height = dr_context->output.height;
> +    out->width  = dr_context->output.width;
> +
> +    for (int i = 0; i < out->height * out->width * 3; i++) {
> +        dr_context->input.data[i] = in->data[0][i] / 255.0;
> +    }
> +
> +    av_frame_free(&in);
> +    dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model);
> +    if (dnn_result != DNN_SUCCESS){
> +        av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
> +        return AVERROR(EIO);
> +    }
> +
> +    for (int i = 0; i < out->height * out->width * 3; i++) {
> +        out->data[0][i] = (int)(dr_context->output.data[i] * 255);
> +    }
> +
> +    return ff_filter_frame(outlink, out);
> +}
> +
> +static av_cold int init(AVFilterContext *ctx)
> +{
> +    DRContext *dr_context = ctx->priv;
> +
> +    dr_context->dnn_module = ff_get_dnn_module(dr_context->backend_type);
> +    if (!dr_context->dnn_module) {
> +        av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
> +        return AVERROR(ENOMEM);
> +    }
> +    if (!dr_context->model_filename) {
> +        av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n");
> +        return AVERROR(EINVAL);
> +    }
> +    if (!dr_context->dnn_module->load_model) {
> +        av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    dr_context->model = (dr_context->dnn_module->load_model)(dr_context->model_filename);
> +    if (!dr_context->model) {
> +        av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    return 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    DRContext *dr_context = ctx->priv;
> +
> +    if (dr_context->dnn_module) {
> +        (dr_context->dnn_module->free_model)(&dr_context->model);
> +        av_freep(&dr_context->dnn_module);
> +    }
> +}
> +
> +static const AVFilterPad derain_inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = config_inputs,
> +        .filter_frame = filter_frame,
> +    },
> +    { NULL }
> +};
> +
> +static const AVFilterPad derain_outputs[] = {
> +    {
> +        .name = "default",
> +        .type = AVMEDIA_TYPE_VIDEO,
> +    },
> +    { NULL }
> +};
> +
> +AVFilter ff_vf_derain = {
> +    .name          = "derain",
> +    .description   = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."),
> +    .priv_size     = sizeof(DRContext),
> +    .init          = init,
> +    .uninit        = uninit,
> +    .query_formats = query_formats,
> +    .inputs        = derain_inputs,
> +    .outputs       = derain_outputs,
> +    .priv_class    = &derain_class,
> +    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
> +};
> +
> --
> 2.17.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".


More information about the ffmpeg-devel mailing list