FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vf_srcnn.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * Filter implementing image super-resolution using deep convolutional networks.
24  * https://arxiv.org/abs/1501.00092
25  */
26 
27 #include "avfilter.h"
28 #include "formats.h"
29 #include "internal.h"
30 #include "libavutil/opt.h"
31 #include "libavformat/avio.h"
32 #include "dnn_interface.h"
33 
34 typedef struct SRCNNContext {
35  const AVClass *class;
36 
43 } SRCNNContext;
44 
45 #define OFFSET(x) offsetof(SRCNNContext, x)
46 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
47 static const AVOption srcnn_options[] = {
48  { "dnn_backend", "DNN backend used for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
49  { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
50 #if (CONFIG_LIBTENSORFLOW == 1)
51  { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
52 #endif
53  { "model_filename", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
54  { NULL }
55 };
56 
58 
59 static av_cold int init(AVFilterContext* context)
60 {
61  SRCNNContext* srcnn_context = context->priv;
62 
63  srcnn_context->dnn_module = ff_get_dnn_module(srcnn_context->backend_type);
64  if (!srcnn_context->dnn_module){
65  av_log(context, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
66  return AVERROR(ENOMEM);
67  }
68  if (!srcnn_context->model_filename){
69  av_log(context, AV_LOG_VERBOSE, "model file for network was not specified, using default network for x2 upsampling\n");
70  srcnn_context->model = (srcnn_context->dnn_module->load_default_model)(DNN_SRCNN);
71  }
72  else{
73  srcnn_context->model = (srcnn_context->dnn_module->load_model)(srcnn_context->model_filename);
74  }
75  if (!srcnn_context->model){
76  av_log(context, AV_LOG_ERROR, "could not load DNN model\n");
77  return AVERROR(EIO);
78  }
79 
80  return 0;
81 }
82 
83 static int query_formats(AVFilterContext* context)
84 {
88  AVFilterFormats* formats_list;
89 
90  formats_list = ff_make_format_list(pixel_formats);
91  if (!formats_list){
92  av_log(context, AV_LOG_ERROR, "could not create formats list\n");
93  return AVERROR(ENOMEM);
94  }
95  return ff_set_common_formats(context, formats_list);
96 }
97 
98 static int config_props(AVFilterLink* inlink)
99 {
100  AVFilterContext* context = inlink->dst;
101  SRCNNContext* srcnn_context = context->priv;
102  DNNReturnType result;
103 
104  srcnn_context->input_output_buf = av_malloc(inlink->h * inlink->w * sizeof(float));
105  if (!srcnn_context->input_output_buf){
106  av_log(context, AV_LOG_ERROR, "could not allocate memory for input/output buffer\n");
107  return AVERROR(ENOMEM);
108  }
109 
110  srcnn_context->input_output.data = srcnn_context->input_output_buf;
111  srcnn_context->input_output.width = inlink->w;
112  srcnn_context->input_output.height = inlink->h;
113  srcnn_context->input_output.channels = 1;
114 
115  result = (srcnn_context->model->set_input_output)(srcnn_context->model->model, &srcnn_context->input_output, &srcnn_context->input_output);
116  if (result != DNN_SUCCESS){
117  av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
118  return AVERROR(EIO);
119  }
120  else{
121  return 0;
122  }
123 }
124 
125 typedef struct ThreadData{
128 } ThreadData;
129 
130 static int uint8_to_float(AVFilterContext* context, void* arg, int jobnr, int nb_jobs)
131 {
132  SRCNNContext* srcnn_context = context->priv;
133  const ThreadData* td = arg;
134  const int slice_start = (td->height * jobnr ) / nb_jobs;
135  const int slice_end = (td->height * (jobnr + 1)) / nb_jobs;
136  const uint8_t* src = td->out + slice_start * td->out_linesize;
137  float* dst = srcnn_context->input_output_buf + slice_start * td->width;
138  int y, x;
139 
140  for (y = slice_start; y < slice_end; ++y){
141  for (x = 0; x < td->width; ++x){
142  dst[x] = (float)src[x] / 255.0f;
143  }
144  src += td->out_linesize;
145  dst += td->width;
146  }
147 
148  return 0;
149 }
150 
151 static int float_to_uint8(AVFilterContext* context, void* arg, int jobnr, int nb_jobs)
152 {
153  SRCNNContext* srcnn_context = context->priv;
154  const ThreadData* td = arg;
155  const int slice_start = (td->height * jobnr ) / nb_jobs;
156  const int slice_end = (td->height * (jobnr + 1)) / nb_jobs;
157  const float* src = srcnn_context->input_output_buf + slice_start * td->width;
158  uint8_t* dst = td->out + slice_start * td->out_linesize;
159  int y, x;
160 
161  for (y = slice_start; y < slice_end; ++y){
162  for (x = 0; x < td->width; ++x){
163  dst[x] = (uint8_t)(255.0f * FFMIN(src[x], 1.0f));
164  }
165  src += td->width;
166  dst += td->out_linesize;
167  }
168 
169  return 0;
170 }
171 
172 static int filter_frame(AVFilterLink* inlink, AVFrame* in)
173 {
174  AVFilterContext* context = inlink->dst;
175  SRCNNContext* srcnn_context = context->priv;
176  AVFilterLink* outlink = context->outputs[0];
177  AVFrame* out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
178  ThreadData td;
179  int nb_threads;
180  DNNReturnType dnn_result;
181 
182  if (!out){
183  av_log(context, AV_LOG_ERROR, "could not allocate memory for output frame\n");
184  av_frame_free(&in);
185  return AVERROR(ENOMEM);
186  }
187  av_frame_copy_props(out, in);
188  av_frame_copy(out, in);
189  av_frame_free(&in);
190  td.out = out->data[0];
191  td.out_linesize = out->linesize[0];
192  td.height = out->height;
193  td.width = out->width;
194 
195  nb_threads = ff_filter_get_nb_threads(context);
196  context->internal->execute(context, uint8_to_float, &td, NULL, FFMIN(td.height, nb_threads));
197 
198  dnn_result = (srcnn_context->dnn_module->execute_model)(srcnn_context->model);
199  if (dnn_result != DNN_SUCCESS){
200  av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n");
201  return AVERROR(EIO);
202  }
203 
204  context->internal->execute(context, float_to_uint8, &td, NULL, FFMIN(td.height, nb_threads));
205 
206  return ff_filter_frame(outlink, out);
207 }
208 
209 static av_cold void uninit(AVFilterContext* context)
210 {
211  SRCNNContext* srcnn_context = context->priv;
212 
213  if (srcnn_context->dnn_module){
214  (srcnn_context->dnn_module->free_model)(&srcnn_context->model);
215  av_freep(&srcnn_context->dnn_module);
216  }
217  av_freep(&srcnn_context->input_output_buf);
218 }
219 
220 static const AVFilterPad srcnn_inputs[] = {
221  {
222  .name = "default",
223  .type = AVMEDIA_TYPE_VIDEO,
224  .config_props = config_props,
225  .filter_frame = filter_frame,
226  },
227  { NULL }
228 };
229 
230 static const AVFilterPad srcnn_outputs[] = {
231  {
232  .name = "default",
233  .type = AVMEDIA_TYPE_VIDEO,
234  },
235  { NULL }
236 };
237 
239  .name = "srcnn",
240  .description = NULL_IF_CONFIG_SMALL("Apply super resolution convolutional neural network to the input. Use bicubic upsamping with corresponding scaling factor before."),
241  .priv_size = sizeof(SRCNNContext),
242  .init = init,
243  .uninit = uninit,
245  .inputs = srcnn_inputs,
246  .outputs = srcnn_outputs,
247  .priv_class = &srcnn_class,
249 };
250 
void * model
Definition: dnn_interface.h:42
#define NULL
Definition: coverity.c:32
AVFrame * out
Definition: af_adeclick.c:485
Buffered I/O operations.
static int float_to_uint8(AVFilterContext *context, void *arg, int jobnr, int nb_jobs)
Definition: vf_srcnn.c:151
AVFilter ff_vf_srcnn
Definition: vf_srcnn.c:238
This structure describes decoded (raw) audio or video data.
Definition: frame.h:218
AVOption.
Definition: opt.h:246
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:67
static const AVFilterPad srcnn_inputs[]
Definition: vf_srcnn.c:220
Main libavfilter public API header.
int channels
Definition: dnn_interface.h:37
DNNData input_output
Definition: vf_srcnn.c:42
float * input_output_buf
Definition: vf_srcnn.c:38
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
#define src
Definition: vp8dsp.c:254
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
uint8_t * out
Definition: vf_srcnn.c:126
#define AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC
Some filters support a generic "enable" expression option that can be used to enable or disable a fil...
Definition: avfilter.h:125
const char * name
Pad name.
Definition: internal.h:60
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
uint8_t
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
AVOptions.
#define f(width, name)
Definition: cbs_vp9.c:255
int height
Definition: vf_avgblur.c:61
ptrdiff_t out_linesize[3]
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:192
DNNBackendType
Definition: dnn_interface.h:31
#define flags(name, subs,...)
Definition: cbs_h2645.c:263
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
static int uint8_to_float(AVFilterContext *context, void *arg, int jobnr, int nb_jobs)
Definition: vf_srcnn.c:130
int width
Definition: frame.h:276
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
#define td
Definition: regdef.h:70
int height
Definition: dnn_interface.h:37
#define AVERROR(e)
Definition: error.h:43
AVFILTER_DEFINE_CLASS(srcnn)
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
DNNReturnType(* execute_model)(const DNNModel *model)
Definition: dnn_interface.h:55
#define AVFILTER_FLAG_SLICE_THREADS
The filter supports multithreading by splitting frames into multiple parts and processing them concur...
Definition: avfilter.h:116
#define OFFSET(x)
Definition: vf_srcnn.c:45
const char * arg
Definition: jacosubdec.c:66
DNNModel * model
Definition: vf_srcnn.c:41
int av_frame_copy(AVFrame *dst, const AVFrame *src)
Copy the frame data from src to dst.
Definition: frame.c:790
#define FLAGS
Definition: vf_srcnn.c:46
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:66
DNNModule * dnn_module
Definition: vf_srcnn.c:40
static av_cold void uninit(AVFilterContext *context)
Definition: vf_srcnn.c:209
int ff_filter_get_nb_threads(AVFilterContext *ctx)
Get number of threads for current filter instance.
Definition: avfilter.c:802
DNNModel *(* load_model)(const char *model_filename)
Definition: dnn_interface.h:51
#define FFMIN(a, b)
Definition: common.h:96
int out_linesize
Definition: vf_srcnn.c:127
static av_cold int init(AVFilterContext *context)
Definition: vf_srcnn.c:59
DNN inference engine interface.
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
DNNReturnType
Definition: dnn_interface.h:29
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:249
static const AVFilterPad srcnn_outputs[]
Definition: vf_srcnn.c:230
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:68
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
const char * name
Filter name.
Definition: avfilter.h:148
static const AVOption srcnn_options[]
Definition: vf_srcnn.c:47
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
char * model_filename
Definition: vf_srcnn.c:37
DNNBackendType backend_type
Definition: vf_srcnn.c:39
DNNModule * ff_get_dnn_module(DNNBackendType backend_type)
Definition: dnn_interface.c:31
void(* free_model)(DNNModel **model)
Definition: dnn_interface.h:57
AVFilterInternal * internal
An opaque struct for libavfilter internal use.
Definition: avfilter.h:378
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:232
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:62
Y , 8bpp.
Definition: pixfmt.h:70
static int config_props(AVFilterLink *inlink)
Definition: vf_srcnn.c:98
static int query_formats(AVFilterContext *context)
Definition: vf_srcnn.c:83
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:69
avfilter_execute_func * execute
Definition: internal.h:155
static int slice_end(AVCodecContext *avctx, AVFrame *pict)
Handle slice ends.
Definition: mpeg12dec.c:2029
float * data
Definition: dnn_interface.h:36
A list of supported formats for one end of a filter link.
Definition: formats.h:64
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: vf_srcnn.c:172
An instance of a filter.
Definition: avfilter.h:338
int height
Definition: frame.h:276
FILE * out
Definition: movenc.c:54
#define av_freep(p)
internal API functions
DNNModel *(* load_default_model)(DNNDefaultModel model_type)
Definition: dnn_interface.h:53
DNNReturnType(* set_input_output)(void *model, const DNNData *input, const DNNData *output)
Definition: dnn_interface.h:45
AVPixelFormat
Pixel format.
Definition: pixfmt.h:60
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:652