FFmpeg
vf_dnn_processing.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Guo Yejun
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * implementing a generic image processing filter using deep learning networks.
24  */
25 
26 #include "libavformat/avio.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "libavutil/avassert.h"
30 #include "libavutil/imgutils.h"
31 #include "avfilter.h"
32 #include "dnn_interface.h"
33 #include "formats.h"
34 #include "internal.h"
35 #include "libswscale/swscale.h"
36 
37 typedef struct DnnProcessingContext {
38  const AVClass *class;
39 
44 
47 
48  // input & output of the model at execution time
51 
57 
58 #define OFFSET(x) offsetof(DnnProcessingContext, x)
59 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
60 static const AVOption dnn_processing_options[] = {
61  { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
62  { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
63 #if (CONFIG_LIBTENSORFLOW == 1)
64  { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
65 #endif
66  { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
67  { "input", "input name of the model", OFFSET(model_inputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
68  { "output", "output name of the model", OFFSET(model_outputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
69  { NULL }
70 };
71 
72 AVFILTER_DEFINE_CLASS(dnn_processing);
73 
75 {
76  DnnProcessingContext *ctx = context->priv;
77 
78  if (!ctx->model_filename) {
79  av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n");
80  return AVERROR(EINVAL);
81  }
82  if (!ctx->model_inputname) {
83  av_log(ctx, AV_LOG_ERROR, "input name of the model network is not specified\n");
84  return AVERROR(EINVAL);
85  }
86  if (!ctx->model_outputname) {
87  av_log(ctx, AV_LOG_ERROR, "output name of the model network is not specified\n");
88  return AVERROR(EINVAL);
89  }
90 
92  if (!ctx->dnn_module) {
93  av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
94  return AVERROR(ENOMEM);
95  }
96  if (!ctx->dnn_module->load_model) {
97  av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n");
98  return AVERROR(EINVAL);
99  }
100 
101  ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename);
102  if (!ctx->model) {
103  av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n");
104  return AVERROR(EINVAL);
105  }
106 
107  return 0;
108 }
109 
111 {
112  static const enum AVPixelFormat pix_fmts[] = {
118  };
119  AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
120  return ff_set_common_formats(context, fmts_list);
121 }
122 
123 #define LOG_FORMAT_CHANNEL_MISMATCH() \
124  av_log(ctx, AV_LOG_ERROR, \
125  "the frame's format %s does not match " \
126  "the model input channel %d\n", \
127  av_get_pix_fmt_name(fmt), \
128  model_input->channels);
129 
130 static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
131 {
132  AVFilterContext *ctx = inlink->dst;
133  enum AVPixelFormat fmt = inlink->format;
134 
135  // the design is to add explicit scale filter before this filter
136  if (model_input->height != -1 && model_input->height != inlink->h) {
137  av_log(ctx, AV_LOG_ERROR, "the model requires frame height %d but got %d\n",
138  model_input->height, inlink->h);
139  return AVERROR(EIO);
140  }
141  if (model_input->width != -1 && model_input->width != inlink->w) {
142  av_log(ctx, AV_LOG_ERROR, "the model requires frame width %d but got %d\n",
143  model_input->width, inlink->w);
144  return AVERROR(EIO);
145  }
146 
147  switch (fmt) {
148  case AV_PIX_FMT_RGB24:
149  case AV_PIX_FMT_BGR24:
150  if (model_input->channels != 3) {
152  return AVERROR(EIO);
153  }
154  if (model_input->dt != DNN_FLOAT && model_input->dt != DNN_UINT8) {
155  av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32 and uint8.\n");
156  return AVERROR(EIO);
157  }
158  return 0;
159  case AV_PIX_FMT_GRAY8:
160  if (model_input->channels != 1) {
162  return AVERROR(EIO);
163  }
164  if (model_input->dt != DNN_UINT8) {
165  av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type uint8.\n");
166  return AVERROR(EIO);
167  }
168  return 0;
169  case AV_PIX_FMT_GRAYF32:
170  case AV_PIX_FMT_YUV420P:
171  case AV_PIX_FMT_YUV422P:
172  case AV_PIX_FMT_YUV444P:
173  case AV_PIX_FMT_YUV410P:
174  case AV_PIX_FMT_YUV411P:
175  if (model_input->channels != 1) {
177  return AVERROR(EIO);
178  }
179  if (model_input->dt != DNN_FLOAT) {
180  av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type float32.\n");
181  return AVERROR(EIO);
182  }
183  return 0;
184  default:
185  av_log(ctx, AV_LOG_ERROR, "%s not supported.\n", av_get_pix_fmt_name(fmt));
186  return AVERROR(EIO);
187  }
188 
189  return 0;
190 }
191 
193 {
194  AVFilterContext *context = inlink->dst;
195  DnnProcessingContext *ctx = context->priv;
197  DNNData model_input;
198  int check;
199 
200  result = ctx->model->get_input(ctx->model->model, &model_input, ctx->model_inputname);
201  if (result != DNN_SUCCESS) {
202  av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
203  return AVERROR(EIO);
204  }
205 
206  check = check_modelinput_inlink(&model_input, inlink);
207  if (check != 0) {
208  return check;
209  }
210 
211  ctx->input.width = inlink->w;
212  ctx->input.height = inlink->h;
213  ctx->input.channels = model_input.channels;
214  ctx->input.dt = model_input.dt;
215 
216  result = (ctx->model->set_input_output)(ctx->model->model,
217  &ctx->input, ctx->model_inputname,
218  (const char **)&ctx->model_outputname, 1);
219  if (result != DNN_SUCCESS) {
220  av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n");
221  return AVERROR(EIO);
222  }
223 
224  return 0;
225 }
226 
227 static int prepare_sws_context(AVFilterLink *outlink)
228 {
229  AVFilterContext *context = outlink->src;
230  DnnProcessingContext *ctx = context->priv;
231  AVFilterLink *inlink = context->inputs[0];
232  enum AVPixelFormat fmt = inlink->format;
233  DNNDataType input_dt = ctx->input.dt;
234  DNNDataType output_dt = ctx->output.dt;
235 
236  switch (fmt) {
237  case AV_PIX_FMT_RGB24:
238  case AV_PIX_FMT_BGR24:
239  if (input_dt == DNN_FLOAT) {
240  ctx->sws_gray8_to_grayf32 = sws_getContext(inlink->w * 3,
241  inlink->h,
243  inlink->w * 3,
244  inlink->h,
246  0, NULL, NULL, NULL);
247  }
248  if (output_dt == DNN_FLOAT) {
249  ctx->sws_grayf32_to_gray8 = sws_getContext(outlink->w * 3,
250  outlink->h,
252  outlink->w * 3,
253  outlink->h,
255  0, NULL, NULL, NULL);
256  }
257  return 0;
258  case AV_PIX_FMT_YUV420P:
259  case AV_PIX_FMT_YUV422P:
260  case AV_PIX_FMT_YUV444P:
261  case AV_PIX_FMT_YUV410P:
262  case AV_PIX_FMT_YUV411P:
263  av_assert0(input_dt == DNN_FLOAT);
264  av_assert0(output_dt == DNN_FLOAT);
265  ctx->sws_gray8_to_grayf32 = sws_getContext(inlink->w,
266  inlink->h,
268  inlink->w,
269  inlink->h,
271  0, NULL, NULL, NULL);
272  ctx->sws_grayf32_to_gray8 = sws_getContext(outlink->w,
273  outlink->h,
275  outlink->w,
276  outlink->h,
278  0, NULL, NULL, NULL);
279 
280  if (inlink->w != outlink->w || inlink->h != outlink->h) {
282  int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
283  int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
284  int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h);
285  int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w);
286  ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8,
287  sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8,
289  ctx->sws_uv_height = sws_src_h;
290  }
291  return 0;
292  default:
293  //do nothing
294  break;
295  }
296 
297  return 0;
298 }
299 
300 static int config_output(AVFilterLink *outlink)
301 {
302  AVFilterContext *context = outlink->src;
303  DnnProcessingContext *ctx = context->priv;
305 
306  // have a try run in case that the dnn model resize the frame
307  result = (ctx->dnn_module->execute_model)(ctx->model, &ctx->output, 1);
308  if (result != DNN_SUCCESS){
309  av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
310  return AVERROR(EIO);
311  }
312 
313  outlink->w = ctx->output.width;
314  outlink->h = ctx->output.height;
315 
316  prepare_sws_context(outlink);
317 
318  return 0;
319 }
320 
322 {
323  int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
324  DNNData *dnn_input = &ctx->input;
325 
326  switch (frame->format) {
327  case AV_PIX_FMT_RGB24:
328  case AV_PIX_FMT_BGR24:
329  if (dnn_input->dt == DNN_FLOAT) {
330  sws_scale(ctx->sws_gray8_to_grayf32, (const uint8_t **)frame->data, frame->linesize,
331  0, frame->height, (uint8_t * const*)(&dnn_input->data),
332  (const int [4]){frame->linesize[0] * sizeof(float), 0, 0, 0});
333  } else {
334  av_assert0(dnn_input->dt == DNN_UINT8);
335  av_image_copy_plane(dnn_input->data, bytewidth,
336  frame->data[0], frame->linesize[0],
337  bytewidth, frame->height);
338  }
339  return 0;
340  case AV_PIX_FMT_GRAY8:
341  case AV_PIX_FMT_GRAYF32:
342  av_image_copy_plane(dnn_input->data, bytewidth,
343  frame->data[0], frame->linesize[0],
344  bytewidth, frame->height);
345  return 0;
346  case AV_PIX_FMT_YUV420P:
347  case AV_PIX_FMT_YUV422P:
348  case AV_PIX_FMT_YUV444P:
349  case AV_PIX_FMT_YUV410P:
350  case AV_PIX_FMT_YUV411P:
351  sws_scale(ctx->sws_gray8_to_grayf32, (const uint8_t **)frame->data, frame->linesize,
352  0, frame->height, (uint8_t * const*)(&dnn_input->data),
353  (const int [4]){frame->width * sizeof(float), 0, 0, 0});
354  return 0;
355  default:
356  return AVERROR(EIO);
357  }
358 
359  return 0;
360 }
361 
363 {
364  int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
365  DNNData *dnn_output = &ctx->output;
366 
367  switch (frame->format) {
368  case AV_PIX_FMT_RGB24:
369  case AV_PIX_FMT_BGR24:
370  if (dnn_output->dt == DNN_FLOAT) {
371  sws_scale(ctx->sws_grayf32_to_gray8, (const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0},
372  (const int[4]){frame->linesize[0] * sizeof(float), 0, 0, 0},
373  0, frame->height, (uint8_t * const*)frame->data, frame->linesize);
374 
375  } else {
376  av_assert0(dnn_output->dt == DNN_UINT8);
377  av_image_copy_plane(frame->data[0], frame->linesize[0],
378  dnn_output->data, bytewidth,
379  bytewidth, frame->height);
380  }
381  return 0;
382  case AV_PIX_FMT_GRAY8:
383  // it is possible that data type of dnn output is float32,
384  // need to add support for such case when needed.
385  av_assert0(dnn_output->dt == DNN_UINT8);
386  av_image_copy_plane(frame->data[0], frame->linesize[0],
387  dnn_output->data, bytewidth,
388  bytewidth, frame->height);
389  return 0;
390  case AV_PIX_FMT_GRAYF32:
391  av_assert0(dnn_output->dt == DNN_FLOAT);
392  av_image_copy_plane(frame->data[0], frame->linesize[0],
393  dnn_output->data, bytewidth,
394  bytewidth, frame->height);
395  return 0;
396  case AV_PIX_FMT_YUV420P:
397  case AV_PIX_FMT_YUV422P:
398  case AV_PIX_FMT_YUV444P:
399  case AV_PIX_FMT_YUV410P:
400  case AV_PIX_FMT_YUV411P:
401  sws_scale(ctx->sws_grayf32_to_gray8, (const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0},
402  (const int[4]){frame->width * sizeof(float), 0, 0, 0},
403  0, frame->height, (uint8_t * const*)frame->data, frame->linesize);
404  return 0;
405  default:
406  return AVERROR(EIO);
407  }
408 
409  return 0;
410 }
411 
413 {
414  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
415  av_assert0(desc);
416  return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3;
417 }
418 
420 {
421  const AVPixFmtDescriptor *desc;
422  int uv_height;
423 
424  if (!ctx->sws_uv_scale) {
425  av_assert0(in->height == out->height && in->width == out->width);
426  desc = av_pix_fmt_desc_get(in->format);
427  uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h);
428  for (int i = 1; i < 3; ++i) {
429  int bytewidth = av_image_get_linesize(in->format, in->width, i);
430  av_image_copy_plane(out->data[i], out->linesize[i],
431  in->data[i], in->linesize[i],
432  bytewidth, uv_height);
433  }
434  } else {
435  sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1,
436  0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
437  sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), in->linesize + 2,
438  0, ctx->sws_uv_height, out->data + 2, out->linesize + 2);
439  }
440 
441  return 0;
442 }
443 
445 {
446  AVFilterContext *context = inlink->dst;
447  AVFilterLink *outlink = context->outputs[0];
448  DnnProcessingContext *ctx = context->priv;
449  DNNReturnType dnn_result;
450  AVFrame *out;
451 
452  copy_from_frame_to_dnn(ctx, in);
453 
454  dnn_result = (ctx->dnn_module->execute_model)(ctx->model, &ctx->output, 1);
455  if (dnn_result != DNN_SUCCESS){
456  av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
457  av_frame_free(&in);
458  return AVERROR(EIO);
459  }
460 
461  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
462  if (!out) {
463  av_frame_free(&in);
464  return AVERROR(ENOMEM);
465  }
466 
467  av_frame_copy_props(out, in);
468  copy_from_dnn_to_frame(ctx, out);
469 
470  if (isPlanarYUV(in->format))
471  copy_uv_planes(ctx, out, in);
472 
473  av_frame_free(&in);
474  return ff_filter_frame(outlink, out);
475 }
476 
478 {
480 
483  sws_freeContext(context->sws_uv_scale);
484 
485  if (context->dnn_module)
486  (context->dnn_module->free_model)(&context->model);
487 
488  av_freep(&context->dnn_module);
489 }
490 
492  {
493  .name = "default",
494  .type = AVMEDIA_TYPE_VIDEO,
495  .config_props = config_input,
496  .filter_frame = filter_frame,
497  },
498  { NULL }
499 };
500 
502  {
503  .name = "default",
504  .type = AVMEDIA_TYPE_VIDEO,
505  .config_props = config_output,
506  },
507  { NULL }
508 };
509 
511  .name = "dnn_processing",
512  .description = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."),
513  .priv_size = sizeof(DnnProcessingContext),
514  .init = init,
515  .uninit = uninit,
517  .inputs = dnn_processing_inputs,
518  .outputs = dnn_processing_outputs,
519  .priv_class = &dnn_processing_class,
520 };
void * model
Definition: dnn_interface.h:45
#define NULL
Definition: coverity.c:32
int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)
Compute the size of an image line with format pix_fmt and width width for the plane plane...
Definition: imgutils.c:76
static enum AVPixelFormat pix_fmt
Buffered I/O operations.
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2549
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
#define SWS_BICUBIC
Definition: swscale.h:60
AVOption.
Definition: opt.h:246
static const AVOption dnn_processing_options[]
void(* free_model)(DNNModel **model)
Definition: dnn_interface.h:61
const char * fmt
Definition: avisynth_c.h:861
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
misc image utilities
Main libavfilter public API header.
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:68
const char * desc
Definition: nvenc.c:68
struct SwsContext * sws_uv_scale
#define OFFSET(x)
int channels
Definition: dnn_interface.h:40
static const AVFilterPad dnn_processing_outputs[]
struct SwsContext * sws_grayf32_to_gray8
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
static av_cold void uninit(AVFilterContext *ctx)
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
Definition: pixdesc.h:92
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
DNNModel *(* load_model)(const char *model_filename)
Definition: dnn_interface.h:57
AVFilter ff_vf_dnn_processing
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1075
uint8_t
#define av_cold
Definition: attributes.h:82
AVOptions.
static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
struct SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
Allocate and return an SwsContext.
Definition: utils.c:1893
static int config_input(AVFilterLink *inlink)
DNNBackendType
Definition: dnn_interface.h:33
external API header
#define av_log(a,...)
DNNDataType
Definition: dnn_interface.h:35
A filter pad used for either input or output.
Definition: internal.h:54
static int copy_from_frame_to_dnn(DnnProcessingContext *ctx, const AVFrame *frame)
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
Definition: dnn_interface.h:48
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
int width
Definition: frame.h:353
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:569
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:101
int height
Definition: dnn_interface.h:40
static av_cold int init(AVFilterContext *context)
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:148
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * data
Definition: dnn_interface.h:38
void * priv
private data for use by the filter
Definition: avfilter.h:353
simple assert() macros that are a bit more flexible than ISO C assert().
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
Definition: pixdesc.h:106
uint8_t nb_components
The number of components each pixel has, (1-4)
Definition: pixdesc.h:83
static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in)
AVFormatContext * ctx
Definition: movenc.c:48
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2313
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:69
static const AVFilterPad dnn_processing_inputs[]
static int prepare_sws_context(AVFilterLink *outlink)
AVFILTER_DEFINE_CLASS(dnn_processing)
DNN inference engine interface.
DNNReturnType
Definition: dnn_interface.h:31
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:368
#define AV_PIX_FMT_GRAYF32
Definition: pixfmt.h:429
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:326
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
swscale wrapper, so we don&#39;t need to export the SwsContext.
Definition: swscale.c:753
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
static int query_formats(AVFilterContext *context)
static int copy_from_dnn_to_frame(DnnProcessingContext *ctx, AVFrame *frame)
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
const char * name
Filter name.
Definition: avfilter.h:148
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:275
DNNModule * ff_get_dnn_module(DNNBackendType backend_type)
Definition: dnn_interface.c:31
DNNReturnType(* execute_model)(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
Definition: dnn_interface.h:59
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
Y , 8bpp.
Definition: pixfmt.h:74
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
#define LOG_FORMAT_CHANNEL_MISMATCH()
A list of supported formats for one end of a filter link.
Definition: formats.h:64
An instance of a filter.
Definition: avfilter.h:338
and forward the result(frame or status change) to the corresponding input.If nothing is possible
int height
Definition: frame.h:353
FILE * out
Definition: movenc.c:54
struct SwsContext * sws_gray8_to_grayf32
#define av_freep(p)
DNNReturnType(* set_input_output)(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output)
Definition: dnn_interface.h:51
#define av_always_inline
Definition: attributes.h:39
DNNBackendType backend_type
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:338
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2465
internal API functions
static int config_output(AVFilterLink *outlink)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are in without and describe what they for example set the foo of the bar offset is the offset of the field in your local context
DNNDataType dt
Definition: dnn_interface.h:39
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:659
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
#define FLAGS
#define check(x, y, S, v)