FFmpeg
vf_dnn_processing.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Guo Yejun
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * implementing a generic image processing filter using deep learning networks.
24  */
25 
26 #include "libavformat/avio.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "libavutil/avassert.h"
30 #include "libavutil/imgutils.h"
31 #include "avfilter.h"
32 #include "dnn_interface.h"
33 #include "formats.h"
34 #include "internal.h"
35 #include "libswscale/swscale.h"
36 
37 typedef struct DnnProcessingContext {
38  const AVClass *class;
39 
44 
47 
48  // input & output of the model at execution time
51 
57 
58 #define OFFSET(x) offsetof(DnnProcessingContext, x)
59 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
60 static const AVOption dnn_processing_options[] = {
61  { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
62  { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
63 #if (CONFIG_LIBTENSORFLOW == 1)
64  { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
65 #endif
66  { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
67  { "input", "input name of the model", OFFSET(model_inputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
68  { "output", "output name of the model", OFFSET(model_outputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
69  { NULL }
70 };
71 
72 AVFILTER_DEFINE_CLASS(dnn_processing);
73 
75 {
77 
78  if (!ctx->model_filename) {
79  av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n");
80  return AVERROR(EINVAL);
81  }
82  if (!ctx->model_inputname) {
83  av_log(ctx, AV_LOG_ERROR, "input name of the model network is not specified\n");
84  return AVERROR(EINVAL);
85  }
86  if (!ctx->model_outputname) {
87  av_log(ctx, AV_LOG_ERROR, "output name of the model network is not specified\n");
88  return AVERROR(EINVAL);
89  }
90 
91  ctx->dnn_module = ff_get_dnn_module(ctx->backend_type);
92  if (!ctx->dnn_module) {
93  av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
94  return AVERROR(ENOMEM);
95  }
96  if (!ctx->dnn_module->load_model) {
97  av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n");
98  return AVERROR(EINVAL);
99  }
100 
101  ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename);
102  if (!ctx->model) {
103  av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n");
104  return AVERROR(EINVAL);
105  }
106 
107  return 0;
108 }
109 
111 {
112  static const enum AVPixelFormat pix_fmts[] = {
118  };
120  return ff_set_common_formats(context, fmts_list);
121 }
122 
123 #define LOG_FORMAT_CHANNEL_MISMATCH() \
124  av_log(ctx, AV_LOG_ERROR, \
125  "the frame's format %s does not match " \
126  "the model input channel %d\n", \
127  av_get_pix_fmt_name(fmt), \
128  model_input->channels);
129 
130 static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
131 {
132  AVFilterContext *ctx = inlink->dst;
133  enum AVPixelFormat fmt = inlink->format;
134 
135  // the design is to add explicit scale filter before this filter
136  if (model_input->height != -1 && model_input->height != inlink->h) {
137  av_log(ctx, AV_LOG_ERROR, "the model requires frame height %d but got %d\n",
138  model_input->height, inlink->h);
139  return AVERROR(EIO);
140  }
141  if (model_input->width != -1 && model_input->width != inlink->w) {
142  av_log(ctx, AV_LOG_ERROR, "the model requires frame width %d but got %d\n",
143  model_input->width, inlink->w);
144  return AVERROR(EIO);
145  }
146 
147  switch (fmt) {
148  case AV_PIX_FMT_RGB24:
149  case AV_PIX_FMT_BGR24:
150  if (model_input->channels != 3) {
152  return AVERROR(EIO);
153  }
154  if (model_input->dt != DNN_FLOAT && model_input->dt != DNN_UINT8) {
155  av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32 and uint8.\n");
156  return AVERROR(EIO);
157  }
158  return 0;
159  case AV_PIX_FMT_GRAY8:
160  if (model_input->channels != 1) {
162  return AVERROR(EIO);
163  }
164  if (model_input->dt != DNN_UINT8) {
165  av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type uint8.\n");
166  return AVERROR(EIO);
167  }
168  return 0;
169  case AV_PIX_FMT_GRAYF32:
170  case AV_PIX_FMT_YUV420P:
171  case AV_PIX_FMT_YUV422P:
172  case AV_PIX_FMT_YUV444P:
173  case AV_PIX_FMT_YUV410P:
174  case AV_PIX_FMT_YUV411P:
175  if (model_input->channels != 1) {
177  return AVERROR(EIO);
178  }
179  if (model_input->dt != DNN_FLOAT) {
180  av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type float32.\n");
181  return AVERROR(EIO);
182  }
183  return 0;
184  default:
185  av_log(ctx, AV_LOG_ERROR, "%s not supported.\n", av_get_pix_fmt_name(fmt));
186  return AVERROR(EIO);
187  }
188 
189  return 0;
190 }
191 
193 {
197  DNNData model_input;
198  int check;
199 
200  result = ctx->model->get_input(ctx->model->model, &model_input, ctx->model_inputname);
201  if (result != DNN_SUCCESS) {
202  av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
203  return AVERROR(EIO);
204  }
205 
206  check = check_modelinput_inlink(&model_input, inlink);
207  if (check != 0) {
208  return check;
209  }
210 
211  ctx->input.width = inlink->w;
212  ctx->input.height = inlink->h;
213  ctx->input.channels = model_input.channels;
214  ctx->input.dt = model_input.dt;
215 
216  result = (ctx->model->set_input_output)(ctx->model->model,
217  &ctx->input, ctx->model_inputname,
218  (const char **)&ctx->model_outputname, 1);
219  if (result != DNN_SUCCESS) {
220  av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n");
221  return AVERROR(EIO);
222  }
223 
224  return 0;
225 }
226 
227 static int prepare_sws_context(AVFilterLink *outlink)
228 {
229  AVFilterContext *context = outlink->src;
231  AVFilterLink *inlink = context->inputs[0];
232  enum AVPixelFormat fmt = inlink->format;
233  DNNDataType input_dt = ctx->input.dt;
234  DNNDataType output_dt = ctx->output.dt;
235 
236  switch (fmt) {
237  case AV_PIX_FMT_RGB24:
238  case AV_PIX_FMT_BGR24:
239  if (input_dt == DNN_FLOAT) {
240  ctx->sws_gray8_to_grayf32 = sws_getContext(inlink->w * 3,
241  inlink->h,
243  inlink->w * 3,
244  inlink->h,
246  0, NULL, NULL, NULL);
247  }
248  if (output_dt == DNN_FLOAT) {
249  ctx->sws_grayf32_to_gray8 = sws_getContext(outlink->w * 3,
250  outlink->h,
252  outlink->w * 3,
253  outlink->h,
255  0, NULL, NULL, NULL);
256  }
257  return 0;
258  case AV_PIX_FMT_YUV420P:
259  case AV_PIX_FMT_YUV422P:
260  case AV_PIX_FMT_YUV444P:
261  case AV_PIX_FMT_YUV410P:
262  case AV_PIX_FMT_YUV411P:
263  av_assert0(input_dt == DNN_FLOAT);
264  av_assert0(output_dt == DNN_FLOAT);
265  ctx->sws_gray8_to_grayf32 = sws_getContext(inlink->w,
266  inlink->h,
268  inlink->w,
269  inlink->h,
271  0, NULL, NULL, NULL);
272  ctx->sws_grayf32_to_gray8 = sws_getContext(outlink->w,
273  outlink->h,
275  outlink->w,
276  outlink->h,
278  0, NULL, NULL, NULL);
279 
280  if (inlink->w != outlink->w || inlink->h != outlink->h) {
282  int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
283  int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
284  int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h);
285  int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w);
286  ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8,
287  sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8,
289  ctx->sws_uv_height = sws_src_h;
290  }
291  return 0;
292  default:
293  //do nothing
294  break;
295  }
296 
297  return 0;
298 }
299 
300 static int config_output(AVFilterLink *outlink)
301 {
302  AVFilterContext *context = outlink->src;
305 
306  // have a try run in case that the dnn model resize the frame
307  result = (ctx->dnn_module->execute_model)(ctx->model, &ctx->output, 1);
308  if (result != DNN_SUCCESS){
309  av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
310  return AVERROR(EIO);
311  }
312 
313  outlink->w = ctx->output.width;
314  outlink->h = ctx->output.height;
315 
316  prepare_sws_context(outlink);
317 
318  return 0;
319 }
320 
322 {
323  int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
324  DNNData *dnn_input = &ctx->input;
325 
326  switch (frame->format) {
327  case AV_PIX_FMT_RGB24:
328  case AV_PIX_FMT_BGR24:
329  if (dnn_input->dt == DNN_FLOAT) {
330  sws_scale(ctx->sws_gray8_to_grayf32, (const uint8_t **)frame->data, frame->linesize,
331  0, frame->height, (uint8_t * const*)(&dnn_input->data),
332  (const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0});
333  } else {
334  av_assert0(dnn_input->dt == DNN_UINT8);
335  av_image_copy_plane(dnn_input->data, bytewidth,
336  frame->data[0], frame->linesize[0],
337  bytewidth, frame->height);
338  }
339  return 0;
340  case AV_PIX_FMT_GRAY8:
341  case AV_PIX_FMT_GRAYF32:
342  av_image_copy_plane(dnn_input->data, bytewidth,
343  frame->data[0], frame->linesize[0],
344  bytewidth, frame->height);
345  return 0;
346  case AV_PIX_FMT_YUV420P:
347  case AV_PIX_FMT_YUV422P:
348  case AV_PIX_FMT_YUV444P:
349  case AV_PIX_FMT_YUV410P:
350  case AV_PIX_FMT_YUV411P:
351  sws_scale(ctx->sws_gray8_to_grayf32, (const uint8_t **)frame->data, frame->linesize,
352  0, frame->height, (uint8_t * const*)(&dnn_input->data),
353  (const int [4]){frame->width * sizeof(float), 0, 0, 0});
354  return 0;
355  default:
356  return AVERROR(EIO);
357  }
358 
359  return 0;
360 }
361 
363 {
364  int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
365  DNNData *dnn_output = &ctx->output;
366 
367  switch (frame->format) {
368  case AV_PIX_FMT_RGB24:
369  case AV_PIX_FMT_BGR24:
370  if (dnn_output->dt == DNN_FLOAT) {
371  sws_scale(ctx->sws_grayf32_to_gray8, (const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0},
372  (const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0},
373  0, frame->height, (uint8_t * const*)frame->data, frame->linesize);
374 
375  } else {
376  av_assert0(dnn_output->dt == DNN_UINT8);
377  av_image_copy_plane(frame->data[0], frame->linesize[0],
378  dnn_output->data, bytewidth,
379  bytewidth, frame->height);
380  }
381  return 0;
382  case AV_PIX_FMT_GRAY8:
383  // it is possible that data type of dnn output is float32,
384  // need to add support for such case when needed.
385  av_assert0(dnn_output->dt == DNN_UINT8);
386  av_image_copy_plane(frame->data[0], frame->linesize[0],
387  dnn_output->data, bytewidth,
388  bytewidth, frame->height);
389  return 0;
390  case AV_PIX_FMT_GRAYF32:
391  av_assert0(dnn_output->dt == DNN_FLOAT);
392  av_image_copy_plane(frame->data[0], frame->linesize[0],
393  dnn_output->data, bytewidth,
394  bytewidth, frame->height);
395  return 0;
396  case AV_PIX_FMT_YUV420P:
397  case AV_PIX_FMT_YUV422P:
398  case AV_PIX_FMT_YUV444P:
399  case AV_PIX_FMT_YUV410P:
400  case AV_PIX_FMT_YUV411P:
401  sws_scale(ctx->sws_grayf32_to_gray8, (const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0},
402  (const int[4]){frame->width * sizeof(float), 0, 0, 0},
403  0, frame->height, (uint8_t * const*)frame->data, frame->linesize);
404  return 0;
405  default:
406  return AVERROR(EIO);
407  }
408 
409  return 0;
410 }
411 
413 {
415  av_assert0(desc);
416  return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3;
417 }
418 
420 {
421  const AVPixFmtDescriptor *desc;
422  int uv_height;
423 
424  if (!ctx->sws_uv_scale) {
425  av_assert0(in->height == out->height && in->width == out->width);
426  desc = av_pix_fmt_desc_get(in->format);
427  uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h);
428  for (int i = 1; i < 3; ++i) {
429  int bytewidth = av_image_get_linesize(in->format, in->width, i);
430  av_image_copy_plane(out->data[i], out->linesize[i],
431  in->data[i], in->linesize[i],
432  bytewidth, uv_height);
433  }
434  } else {
435  sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1,
436  0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
437  sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), in->linesize + 2,
438  0, ctx->sws_uv_height, out->data + 2, out->linesize + 2);
439  }
440 
441  return 0;
442 }
443 
445 {
447  AVFilterLink *outlink = context->outputs[0];
449  DNNReturnType dnn_result;
450  AVFrame *out;
451 
453 
454  dnn_result = (ctx->dnn_module->execute_model)(ctx->model, &ctx->output, 1);
455  if (dnn_result != DNN_SUCCESS){
456  av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
457  av_frame_free(&in);
458  return AVERROR(EIO);
459  }
460 
461  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
462  if (!out) {
463  av_frame_free(&in);
464  return AVERROR(ENOMEM);
465  }
466 
469 
470  if (isPlanarYUV(in->format))
472 
473  av_frame_free(&in);
474  return ff_filter_frame(outlink, out);
475 }
476 
478 {
480 
481  sws_freeContext(context->sws_gray8_to_grayf32);
482  sws_freeContext(context->sws_grayf32_to_gray8);
483  sws_freeContext(context->sws_uv_scale);
484 
485  if (context->dnn_module)
486  (context->dnn_module->free_model)(&context->model);
487 
488  av_freep(&context->dnn_module);
489 }
490 
492  {
493  .name = "default",
494  .type = AVMEDIA_TYPE_VIDEO,
495  .config_props = config_input,
496  .filter_frame = filter_frame,
497  },
498  { NULL }
499 };
500 
502  {
503  .name = "default",
504  .type = AVMEDIA_TYPE_VIDEO,
505  .config_props = config_output,
506  },
507  { NULL }
508 };
509 
511  .name = "dnn_processing",
512  .description = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."),
513  .priv_size = sizeof(DnnProcessingContext),
514  .init = init,
515  .uninit = uninit,
519  .priv_class = &dnn_processing_class,
520 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_dnn_processing.c:477
dnn_processing_options
static const AVOption dnn_processing_options[]
Definition: vf_dnn_processing.c:60
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
ff_make_format_list
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:300
out
FILE * out
Definition: movenc.c:54
OFFSET
#define OFFSET(x)
Definition: vf_dnn_processing.c:58
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1075
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2549
DNNData::data
void * data
Definition: dnn_interface.h:38
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:300
pixdesc.h
DnnProcessingContext
Definition: vf_dnn_processing.c:37
AVOption
AVOption.
Definition: opt.h:246
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:69
sws_scale
int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
swscale wrapper, so we don't need to export the SwsContext.
Definition: swscale.c:744
DNNData::height
int height
Definition: dnn_interface.h:40
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_dnn_processing.c:192
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:148
av_image_copy_plane
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:338
AVFilterFormats
A list of supported formats for one end of a filter link.
Definition: formats.h:64
formats.h
DnnProcessingContext::sws_grayf32_to_gray8
struct SwsContext * sws_grayf32_to_gray8
Definition: vf_dnn_processing.c:53
DnnProcessingContext::input
DNNData input
Definition: vf_dnn_processing.c:49
DNN_SUCCESS
@ DNN_SUCCESS
Definition: dnn_interface.h:31
ff_vf_dnn_processing
AVFilter ff_vf_dnn_processing
Definition: vf_dnn_processing.c:510
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:54
DnnProcessingContext::dnn_module
DNNModule * dnn_module
Definition: vf_dnn_processing.c:45
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
av_cold
#define av_cold
Definition: attributes.h:90
ff_set_common_formats
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:605
check
#define check(x, y, S, v)
Definition: motion_est_template.c:404
copy_uv_planes
static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in)
Definition: vf_dnn_processing.c:419
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
DNNReturnType
DNNReturnType
Definition: dnn_interface.h:31
DNNData
Definition: dnn_interface.h:37
outputs
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:275
ctx
AVFormatContext * ctx
Definition: movenc.c:48
pix_fmt
static enum AVPixelFormat pix_fmt
Definition: demuxing_decoding.c:40
copy_from_frame_to_dnn
static int copy_from_frame_to_dnn(DnnProcessingContext *ctx, const AVFrame *frame)
Definition: vf_dnn_processing.c:321
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
AV_PIX_FMT_GRAYF32
#define AV_PIX_FMT_GRAYF32
Definition: pixfmt.h:429
check_modelinput_inlink
static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
Definition: vf_dnn_processing.c:130
context
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are in without and describe what they for example set the foo of the bar offset is the offset of the field in your context
Definition: writing_filters.txt:91
DnnProcessingContext::model_inputname
char * model_inputname
Definition: vf_dnn_processing.c:42
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(dnn_processing)
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:67
result
and forward the result(frame or status change) to the corresponding input. If nothing is possible
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:659
init
static av_cold int init(AVFilterContext *context)
Definition: vf_dnn_processing.c:74
DnnProcessingContext::output
DNNData output
Definition: vf_dnn_processing.c:50
DnnProcessingContext::model
DNNModel * model
Definition: vf_dnn_processing.c:46
dnn_processing_inputs
static const AVFilterPad dnn_processing_inputs[]
Definition: vf_dnn_processing.c:491
inputs
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
Definition: filter_design.txt:243
prepare_sws_context
static int prepare_sws_context(AVFilterLink *outlink)
Definition: vf_dnn_processing.c:227
DnnProcessingContext::sws_gray8_to_grayf32
struct SwsContext * sws_gray8_to_grayf32
Definition: vf_dnn_processing.c:52
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:74
query_formats
static int query_formats(AVFilterContext *context)
Definition: vf_dnn_processing.c:110
DnnProcessingContext::model_outputname
char * model_outputname
Definition: vf_dnn_processing.c:43
desc
const char * desc
Definition: nvenc.c:79
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:68
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:188
AV_PIX_FMT_FLAG_RGB
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:148
sws_getContext
struct SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
Allocate and return an SwsContext.
Definition: utils.c:1899
DNNBackendType
DNNBackendType
Definition: dnn_interface.h:33
avio.h
DNNDataType
DNNDataType
Definition: dnn_interface.h:35
DNNData::dt
DNNDataType dt
Definition: dnn_interface.h:39
DNN_FLOAT
@ DNN_FLOAT
Definition: dnn_interface.h:35
internal.h
dnn_interface.h
av_image_get_linesize
int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)
Compute the size of an image line with format pix_fmt and width width for the plane plane.
Definition: imgutils.c:76
in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Definition: audio_convert.c:326
DnnProcessingContext::backend_type
DNNBackendType backend_type
Definition: vf_dnn_processing.c:41
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
av_always_inline
#define av_always_inline
Definition: attributes.h:49
uint8_t
uint8_t
Definition: audio_convert.c:194
DnnProcessingContext::sws_uv_height
int sws_uv_height
Definition: vf_dnn_processing.c:55
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:60
ff_get_dnn_module
DNNModule * ff_get_dnn_module(DNNBackendType backend_type)
Definition: dnn_interface.c:31
AVFilter
Filter definition.
Definition: avfilter.h:144
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
DNN_UINT8
@ DNN_UINT8
Definition: dnn_interface.h:35
sws_freeContext
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2319
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
DnnProcessingContext::model_filename
char * model_filename
Definition: vf_dnn_processing.c:40
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:223
avfilter.h
copy_from_dnn_to_frame
static int copy_from_dnn_to_frame(DnnProcessingContext *ctx, AVFrame *frame)
Definition: vf_dnn_processing.c:362
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
AVFilterContext
An instance of a filter.
Definition: avfilter.h:338
isPlanarYUV
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: vf_dnn_processing.c:412
LOG_FORMAT_CHANNEL_MISMATCH
#define LOG_FORMAT_CHANNEL_MISMATCH()
Definition: vf_dnn_processing.c:123
DNNModel
Definition: dnn_interface.h:43
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
DNNData::channels
int channels
Definition: dnn_interface.h:40
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: vf_dnn_processing.c:444
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
DNNData::width
int width
Definition: dnn_interface.h:40
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
imgutils.h
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:28
FLAGS
#define FLAGS
Definition: vf_dnn_processing.c:59
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Definition: opt.h:227
SwsContext
Definition: swscale_internal.h:280
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Definition: opt.h:232
DnnProcessingContext::sws_uv_scale
struct SwsContext * sws_uv_scale
Definition: vf_dnn_processing.c:54
SWS_BICUBIC
#define SWS_BICUBIC
Definition: swscale.h:60
swscale.h
dnn_processing_outputs
static const AVFilterPad dnn_processing_outputs[]
Definition: vf_dnn_processing.c:501
DNNModule
Definition: dnn_interface.h:55
av_get_pix_fmt_name
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2465
config_output
static int config_output(AVFilterLink *outlink)
Definition: vf_dnn_processing.c:300