FFmpeg
vf_nnedi.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010-2011 Kevin Stone
3  * Copyright (C) 2016 Paul B Mahol
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <float.h>
23 
24 #include "libavutil/common.h"
25 #include "libavutil/float_dsp.h"
26 #include "libavutil/imgutils.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "avfilter.h"
30 #include "formats.h"
31 #include "internal.h"
32 #include "video.h"
33 
34 typedef struct FrameData {
36  int padded_stride[3];
37  int padded_width[3];
38  int padded_height[3];
39 
41  int dst_stride[3];
42 
43  int field[3];
44 
46  float *input;
47  float *temp;
48 } FrameData;
49 
50 typedef struct NNEDIContext {
51  const AVClass *class;
52 
53  char *weights_file;
54 
58  int eof;
59  int64_t cur_pts;
60 
62  int nb_planes;
63  int linesize[4];
64  int planeheight[4];
65 
66  float *weights0;
67  float *weights1[2];
68  int asize;
69  int nns;
70  int xdia;
71  int ydia;
72 
73  // Parameters
74  int deint;
75  int field;
77  int nsize;
78  int nnsparam;
79  int qual;
80  int etype;
81  int pscrn;
82  int fapprox;
83 
84  int max_value;
85 
86  void (*copy_pad)(const AVFrame *, FrameData *, struct NNEDIContext *, int);
87  void (*evalfunc_0)(struct NNEDIContext *, FrameData *);
88  void (*evalfunc_1)(struct NNEDIContext *, FrameData *);
89 
90  // Functions used in evalfunc_0
91  void (*readpixels)(const uint8_t *, const int, float *);
92  void (*compute_network0)(struct NNEDIContext *s, const float *, const float *, uint8_t *);
93  int32_t (*process_line0)(const uint8_t *, int, uint8_t *, const uint8_t *, const int, const int, const int);
94 
95  // Functions used in evalfunc_1
96  void (*extract)(const uint8_t *, const int, const int, const int, float *, float *);
97  void (*dot_prod)(struct NNEDIContext *, const float *, const float *, float *, const int, const int, const float *);
98  void (*expfunc)(float *, const int);
99  void (*wae5)(const float *, const int, float *);
100 
101  FrameData frame_data;
102 } NNEDIContext;
103 
104 #define OFFSET(x) offsetof(NNEDIContext, x)
105 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
106 
107 static const AVOption nnedi_options[] = {
108  {"weights", "set weights file", OFFSET(weights_file), AV_OPT_TYPE_STRING, {.str="nnedi3_weights.bin"}, 0, 0, FLAGS },
109  {"deint", "set which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "deint" },
110  {"all", "deinterlace all frames", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "deint" },
111  {"interlaced", "only deinterlace frames marked as interlaced", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "deint" },
112  {"field", "set mode of operation", OFFSET(field), AV_OPT_TYPE_INT, {.i64=-1}, -2, 3, FLAGS, "field" },
113  {"af", "use frame flags, both fields", 0, AV_OPT_TYPE_CONST, {.i64=-2}, 0, 0, FLAGS, "field" },
114  {"a", "use frame flags, single field", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, FLAGS, "field" },
115  {"t", "use top field only", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "field" },
116  {"b", "use bottom field only", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "field" },
117  {"tf", "use both fields, top first", 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "field" },
118  {"bf", "use both fields, bottom first", 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "field" },
119  {"planes", "set which planes to process", OFFSET(process_plane), AV_OPT_TYPE_INT, {.i64=7}, 0, 7, FLAGS },
120  {"nsize", "set size of local neighborhood around each pixel, used by the predictor neural network", OFFSET(nsize), AV_OPT_TYPE_INT, {.i64=6}, 0, 6, FLAGS, "nsize" },
121  {"s8x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "nsize" },
122  {"s16x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "nsize" },
123  {"s32x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "nsize" },
124  {"s48x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "nsize" },
125  {"s8x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, FLAGS, "nsize" },
126  {"s16x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=5}, 0, 0, FLAGS, "nsize" },
127  {"s32x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=6}, 0, 0, FLAGS, "nsize" },
128  {"nns", "set number of neurons in predictor neural network", OFFSET(nnsparam), AV_OPT_TYPE_INT, {.i64=1}, 0, 4, FLAGS, "nns" },
129  {"n16", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "nns" },
130  {"n32", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "nns" },
131  {"n64", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "nns" },
132  {"n128", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "nns" },
133  {"n256", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, FLAGS, "nns" },
134  {"qual", "set quality", OFFSET(qual), AV_OPT_TYPE_INT, {.i64=1}, 1, 2, FLAGS, "qual" },
135  {"fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "qual" },
136  {"slow", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "qual" },
137  {"etype", "set which set of weights to use in the predictor", OFFSET(etype), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "etype" },
138  {"a", "weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "etype" },
139  {"s", "weights trained to minimize squared error", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "etype" },
140  {"pscrn", "set prescreening", OFFSET(pscrn), AV_OPT_TYPE_INT, {.i64=2}, 0, 2, FLAGS, "pscrn" },
141  {"none", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "pscrn" },
142  {"original", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "pscrn" },
143  {"new", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "pscrn" },
144  {"fapprox", NULL, OFFSET(fapprox), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS },
145  { NULL }
146 };
147 
148 AVFILTER_DEFINE_CLASS(nnedi);
149 
151 {
152  AVFilterContext *ctx = inlink->dst;
153  NNEDIContext *s = ctx->priv;
155  int ret;
156 
158  if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
159  return ret;
160 
161  s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
162  s->planeheight[0] = s->planeheight[3] = inlink->h;
163 
164  return 0;
165 }
166 
167 static int config_output(AVFilterLink *outlink)
168 {
169  AVFilterContext *ctx = outlink->src;
170  NNEDIContext *s = ctx->priv;
171 
172  outlink->time_base.num = ctx->inputs[0]->time_base.num;
173  outlink->time_base.den = ctx->inputs[0]->time_base.den * 2;
174  outlink->w = ctx->inputs[0]->w;
175  outlink->h = ctx->inputs[0]->h;
176 
177  if (s->field > 1 || s->field == -2)
178  outlink->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
179  (AVRational){2, 1});
180 
181  return 0;
182 }
183 
185 {
186  static const enum AVPixelFormat pix_fmts[] = {
196  };
197 
198  AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
199  if (!fmts_list)
200  return AVERROR(ENOMEM);
201  return ff_set_common_formats(ctx, fmts_list);
202 }
203 
204 static void copy_pad(const AVFrame *src, FrameData *frame_data, NNEDIContext *s, int fn)
205 {
206  const int off = 1 - fn;
207  int plane, y, x;
208 
209  for (plane = 0; plane < s->nb_planes; plane++) {
210  const uint8_t *srcp = (const uint8_t *)src->data[plane];
211  uint8_t *dstp = (uint8_t *)frame_data->paddedp[plane];
212 
213  const int src_stride = src->linesize[plane];
214  const int dst_stride = frame_data->padded_stride[plane];
215 
216  const int src_height = s->planeheight[plane];
217  const int dst_height = frame_data->padded_height[plane];
218 
219  const int src_width = s->linesize[plane];
220  const int dst_width = frame_data->padded_width[plane];
221 
222  int c = 4;
223 
224  if (!(s->process_plane & (1 << plane)))
225  continue;
226 
227  // Copy.
228  for (y = off; y < src_height; y += 2)
229  memcpy(dstp + 32 + (6 + y) * dst_stride,
230  srcp + y * src_stride,
231  src_width * sizeof(uint8_t));
232 
233  // And pad.
234  dstp += (6 + off) * dst_stride;
235  for (y = 6 + off; y < dst_height - 6; y += 2) {
236  int c = 2;
237 
238  for (x = 0; x < 32; x++)
239  dstp[x] = dstp[64 - x];
240 
241  for (x = dst_width - 32; x < dst_width; x++, c += 2)
242  dstp[x] = dstp[x - c];
243 
244  dstp += dst_stride * 2;
245  }
246 
247  dstp = (uint8_t *)frame_data->paddedp[plane];
248  for (y = off; y < 6; y += 2)
249  memcpy(dstp + y * dst_stride,
250  dstp + (12 + 2 * off - y) * dst_stride,
251  dst_width * sizeof(uint8_t));
252 
253  for (y = dst_height - 6 + off; y < dst_height; y += 2, c += 4)
254  memcpy(dstp + y * dst_stride,
255  dstp + (y - c) * dst_stride,
256  dst_width * sizeof(uint8_t));
257  }
258 }
259 
260 static void elliott(float *data, const int n)
261 {
262  int i;
263 
264  for (i = 0; i < n; i++)
265  data[i] = data[i] / (1.0f + FFABS(data[i]));
266 }
267 
268 static void dot_prod(NNEDIContext *s, const float *data, const float *weights, float *vals, const int n, const int len, const float *scale)
269 {
270  int i;
271 
272  for (i = 0; i < n; i++) {
273  float sum;
274 
275  sum = s->fdsp->scalarproduct_float(data, &weights[i * len], len);
276 
277  vals[i] = sum * scale[0] + weights[n * len + i];
278  }
279 }
280 
281 static void dot_prods(NNEDIContext *s, const float *dataf, const float *weightsf, float *vals, const int n, const int len, const float *scale)
282 {
283  const int16_t *data = (int16_t *)dataf;
284  const int16_t *weights = (int16_t *)weightsf;
285  const float *wf = (float *)&weights[n * len];
286  int i, j;
287 
288  for (i = 0; i < n; i++) {
289  int sum = 0, off = ((i >> 2) << 3) + (i & 3);
290  for (j = 0; j < len; j++)
291  sum += data[j] * weights[i * len + j];
292 
293  vals[i] = sum * wf[off] * scale[0] + wf[off + 4];
294  }
295 }
296 
297 static void compute_network0(NNEDIContext *s, const float *input, const float *weights, uint8_t *d)
298 {
299  float t, temp[12], scale = 1.0f;
300 
301  dot_prod(s, input, weights, temp, 4, 48, &scale);
302  t = temp[0];
303  elliott(temp, 4);
304  temp[0] = t;
305  dot_prod(s, temp, weights + 4 * 49, temp + 4, 4, 4, &scale);
306  elliott(temp + 4, 4);
307  dot_prod(s, temp, weights + 4 * 49 + 4 * 5, temp + 8, 4, 8, &scale);
308  if (FFMAX(temp[10], temp[11]) <= FFMAX(temp[8], temp[9]))
309  d[0] = 1;
310  else
311  d[0] = 0;
312 }
313 
314 static void compute_network0_i16(NNEDIContext *s, const float *inputf, const float *weightsf, uint8_t *d)
315 {
316  const float *wf = weightsf + 2 * 48;
317  float t, temp[12], scale = 1.0f;
318 
319  dot_prods(s, inputf, weightsf, temp, 4, 48, &scale);
320  t = temp[0];
321  elliott(temp, 4);
322  temp[0] = t;
323  dot_prod(s, temp, wf + 8, temp + 4, 4, 4, &scale);
324  elliott(temp + 4, 4);
325  dot_prod(s, temp, wf + 8 + 4 * 5, temp + 8, 4, 8, &scale);
326  if (FFMAX(temp[10], temp[11]) <= FFMAX(temp[8], temp[9]))
327  d[0] = 1;
328  else
329  d[0] = 0;
330 }
331 
332 static void pixel2float48(const uint8_t *t8, const int pitch, float *p)
333 {
334  const uint8_t *t = (const uint8_t *)t8;
335  int y, x;
336 
337  for (y = 0; y < 4; y++)
338  for (x = 0; x < 12; x++)
339  p[y * 12 + x] = t[y * pitch * 2 + x];
340 }
341 
342 static void byte2word48(const uint8_t *t, const int pitch, float *pf)
343 {
344  int16_t *p = (int16_t *)pf;
345  int y, x;
346 
347  for (y = 0; y < 4; y++)
348  for (x = 0; x < 12; x++)
349  p[y * 12 + x] = t[y * pitch * 2 + x];
350 }
351 
352 static int32_t process_line0(const uint8_t *tempu, int width, uint8_t *dstp8, const uint8_t *src3p8, const int src_pitch, const int max_value, const int chroma)
353 {
354  uint8_t *dstp = (uint8_t *)dstp8;
355  const uint8_t *src3p = (const uint8_t *)src3p8;
356  int minimum = 0;
357  int maximum = max_value - 1; // Technically the -1 is only needed for 8 and 16 bit input.
358  int count = 0, x;
359  for (x = 0; x < width; x++) {
360  if (tempu[x]) {
361  int tmp = 19 * (src3p[x + src_pitch * 2] + src3p[x + src_pitch * 4]) - 3 * (src3p[x] + src3p[x + src_pitch * 6]);
362  tmp /= 32;
363  dstp[x] = FFMAX(FFMIN(tmp, maximum), minimum);
364  } else {
365  dstp[x] = 255;
366  count++;
367  }
368  }
369  return count;
370 }
371 
372 // new prescreener functions
373 static void byte2word64(const uint8_t *t, const int pitch, float *p)
374 {
375  int16_t *ps = (int16_t *)p;
376  int y, x;
377 
378  for (y = 0; y < 4; y++)
379  for (x = 0; x < 16; x++)
380  ps[y * 16 + x] = t[y * pitch * 2 + x];
381 }
382 
383 static void compute_network0new(NNEDIContext *s, const float *datai, const float *weights, uint8_t *d)
384 {
385  int16_t *data = (int16_t *)datai;
386  int16_t *ws = (int16_t *)weights;
387  float *wf = (float *)&ws[4 * 64];
388  float vals[8];
389  int mask, i, j;
390 
391  for (i = 0; i < 4; i++) {
392  int sum = 0;
393  float t;
394 
395  for (j = 0; j < 64; j++)
396  sum += data[j] * ws[(i << 3) + ((j >> 3) << 5) + (j & 7)];
397  t = sum * wf[i] + wf[4 + i];
398  vals[i] = t / (1.0f + FFABS(t));
399  }
400 
401  for (i = 0; i < 4; i++) {
402  float sum = 0.0f;
403 
404  for (j = 0; j < 4; j++)
405  sum += vals[j] * wf[8 + i + (j << 2)];
406  vals[4 + i] = sum + wf[8 + 16 + i];
407  }
408 
409  mask = 0;
410  for (i = 0; i < 4; i++) {
411  if (vals[4 + i] > 0.0f)
412  mask |= (0x1 << (i << 3));
413  }
414 
415  ((int *)d)[0] = mask;
416 }
417 
419 {
420  float *input = frame_data->input;
421  const float *weights0 = s->weights0;
422  float *temp = frame_data->temp;
423  uint8_t *tempu = (uint8_t *)temp;
424  int plane, x, y;
425 
426  // And now the actual work.
427  for (plane = 0; plane < s->nb_planes; plane++) {
428  const uint8_t *srcp = (const uint8_t *)frame_data->paddedp[plane];
429  const int src_stride = frame_data->padded_stride[plane] / sizeof(uint8_t);
430 
431  const int width = frame_data->padded_width[plane];
432  const int height = frame_data->padded_height[plane];
433 
434  uint8_t *dstp = (uint8_t *)frame_data->dstp[plane];
435  const int dst_stride = frame_data->dst_stride[plane] / sizeof(uint8_t);
436  const uint8_t *src3p;
437  int ystart, ystop;
438  int32_t *lcount;
439 
440  if (!(s->process_plane & (1 << plane)))
441  continue;
442 
443  for (y = 1 - frame_data->field[plane]; y < height - 12; y += 2) {
444  memcpy(dstp + y * dst_stride,
445  srcp + 32 + (6 + y) * src_stride,
446  (width - 64) * sizeof(uint8_t));
447 
448  }
449 
450  ystart = 6 + frame_data->field[plane];
451  ystop = height - 6;
452  srcp += ystart * src_stride;
453  dstp += (ystart - 6) * dst_stride - 32;
454  src3p = srcp - src_stride * 3;
455  lcount = frame_data->lcount[plane] - 6;
456 
457  if (s->pscrn == 1) { // original
458  for (y = ystart; y < ystop; y += 2) {
459  for (x = 32; x < width - 32; x++) {
460  s->readpixels((const uint8_t *)(src3p + x - 5), src_stride, input);
461  s->compute_network0(s, input, weights0, tempu+x);
462  }
463  lcount[y] += s->process_line0(tempu + 32, width - 64, (uint8_t *)(dstp + 32), (const uint8_t *)(src3p + 32), src_stride, s->max_value, plane);
464  src3p += src_stride * 2;
465  dstp += dst_stride * 2;
466  }
467  } else if (s->pscrn > 1) { // new
468  for (y = ystart; y < ystop; y += 2) {
469  for (x = 32; x < width - 32; x += 4) {
470  s->readpixels((const uint8_t *)(src3p + x - 6), src_stride, input);
471  s->compute_network0(s, input, weights0, tempu + x);
472  }
473  lcount[y] += s->process_line0(tempu + 32, width - 64, (uint8_t *)(dstp + 32), (const uint8_t *)(src3p + 32), src_stride, s->max_value, plane);
474  src3p += src_stride * 2;
475  dstp += dst_stride * 2;
476  }
477  } else { // no prescreening
478  for (y = ystart; y < ystop; y += 2) {
479  memset(dstp + 32, 255, (width - 64) * sizeof(uint8_t));
480  lcount[y] += width - 64;
481  dstp += dst_stride * 2;
482  }
483  }
484  }
485 }
486 
487 static void extract_m8(const uint8_t *srcp8, const int stride, const int xdia, const int ydia, float *mstd, float *input)
488 {
489  // uint8_t or uint16_t or float
490  const uint8_t *srcp = (const uint8_t *)srcp8;
491  float scale;
492  double tmp;
493 
494  // int32_t or int64_t or double
495  int64_t sum = 0, sumsq = 0;
496  int y, x;
497 
498  for (y = 0; y < ydia; y++) {
499  const uint8_t *srcpT = srcp + y * stride * 2;
500 
501  for (x = 0; x < xdia; x++) {
502  sum += srcpT[x];
503  sumsq += (uint32_t)srcpT[x] * (uint32_t)srcpT[x];
504  input[x] = srcpT[x];
505  }
506  input += xdia;
507  }
508  scale = 1.0f / (xdia * ydia);
509  mstd[0] = sum * scale;
510  tmp = (double)sumsq * scale - (double)mstd[0] * mstd[0];
511  mstd[3] = 0.0f;
512  if (tmp <= FLT_EPSILON)
513  mstd[1] = mstd[2] = 0.0f;
514  else {
515  mstd[1] = sqrt(tmp);
516  mstd[2] = 1.0f / mstd[1];
517  }
518 }
519 
520 static void extract_m8_i16(const uint8_t *srcp, const int stride, const int xdia, const int ydia, float *mstd, float *inputf)
521 {
522  int16_t *input = (int16_t *)inputf;
523  float scale;
524  int sum = 0, sumsq = 0;
525  int y, x;
526 
527  for (y = 0; y < ydia; y++) {
528  const uint8_t *srcpT = srcp + y * stride * 2;
529  for (x = 0; x < xdia; x++) {
530  sum += srcpT[x];
531  sumsq += srcpT[x] * srcpT[x];
532  input[x] = srcpT[x];
533  }
534  input += xdia;
535  }
536  scale = 1.0f / (float)(xdia * ydia);
537  mstd[0] = sum * scale;
538  mstd[1] = sumsq * scale - mstd[0] * mstd[0];
539  mstd[3] = 0.0f;
540  if (mstd[1] <= FLT_EPSILON)
541  mstd[1] = mstd[2] = 0.0f;
542  else {
543  mstd[1] = sqrt(mstd[1]);
544  mstd[2] = 1.0f / mstd[1];
545  }
546 }
547 
548 
549 static const float exp_lo = -80.0f;
550 static const float exp_hi = +80.0f;
551 
552 static void e2_m16(float *s, const int n)
553 {
554  int i;
555 
556  for (i = 0; i < n; i++)
557  s[i] = exp(av_clipf(s[i], exp_lo, exp_hi));
558 }
559 
560 const float min_weight_sum = 1e-10f;
561 
562 static void weighted_avg_elliott_mul5_m16(const float *w, const int n, float *mstd)
563 {
564  float vsum = 0.0f, wsum = 0.0f;
565  int i;
566 
567  for (i = 0; i < n; i++) {
568  vsum += w[i] * (w[n + i] / (1.0f + FFABS(w[n + i])));
569  wsum += w[i];
570  }
571  if (wsum > min_weight_sum)
572  mstd[3] += ((5.0f * vsum) / wsum) * mstd[1] + mstd[0];
573  else
574  mstd[3] += mstd[0];
575 }
576 
577 
579 {
580  float *input = frame_data->input;
581  float *temp = frame_data->temp;
582  float **weights1 = s->weights1;
583  const int qual = s->qual;
584  const int asize = s->asize;
585  const int nns = s->nns;
586  const int xdia = s->xdia;
587  const int xdiad2m1 = (xdia / 2) - 1;
588  const int ydia = s->ydia;
589  const float scale = 1.0f / (float)qual;
590  int plane, y, x, i;
591 
592  for (plane = 0; plane < s->nb_planes; plane++) {
593  const uint8_t *srcp = (const uint8_t *)frame_data->paddedp[plane];
594  const int src_stride = frame_data->padded_stride[plane] / sizeof(uint8_t);
595 
596  const int width = frame_data->padded_width[plane];
597  const int height = frame_data->padded_height[plane];
598 
599  uint8_t *dstp = (uint8_t *)frame_data->dstp[plane];
600  const int dst_stride = frame_data->dst_stride[plane] / sizeof(uint8_t);
601 
602  const int ystart = frame_data->field[plane];
603  const int ystop = height - 12;
604  const uint8_t *srcpp;
605 
606  if (!(s->process_plane & (1 << plane)))
607  continue;
608 
609  srcp += (ystart + 6) * src_stride;
610  dstp += ystart * dst_stride - 32;
611  srcpp = srcp - (ydia - 1) * src_stride - xdiad2m1;
612 
613  for (y = ystart; y < ystop; y += 2) {
614  for (x = 32; x < width - 32; x++) {
615  float mstd[4];
616 
617  if (dstp[x] != 255)
618  continue;
619 
620  s->extract((const uint8_t *)(srcpp + x), src_stride, xdia, ydia, mstd, input);
621  for (i = 0; i < qual; i++) {
622  s->dot_prod(s, input, weights1[i], temp, nns * 2, asize, mstd + 2);
623  s->expfunc(temp, nns);
624  s->wae5(temp, nns, mstd);
625  }
626 
627  dstp[x] = FFMIN(FFMAX((int)(mstd[3] * scale + 0.5f), 0), s->max_value);
628  }
629  srcpp += src_stride * 2;
630  dstp += dst_stride * 2;
631  }
632  }
633 }
634 
635 #define NUM_NSIZE 7
636 #define NUM_NNS 5
637 
638 static int roundds(const double f)
639 {
640  if (f - floor(f) >= 0.5)
641  return FFMIN((int)ceil(f), 32767);
642  return FFMAX((int)floor(f), -32768);
643 }
644 
646 {
647  s->copy_pad = copy_pad;
648  s->evalfunc_0 = evalfunc_0;
649  s->evalfunc_1 = evalfunc_1;
650 
651  // evalfunc_0
653 
654  if (s->pscrn < 2) { // original prescreener
655  if (s->fapprox & 1) { // int16 dot products
656  s->readpixels = byte2word48;
658  } else {
661  }
662  } else { // new prescreener
663  // only int16 dot products
664  s->readpixels = byte2word64;
666  }
667 
668  // evalfunc_1
670 
671  if (s->fapprox & 2) { // use int16 dot products
672  s->extract = extract_m8_i16;
673  s->dot_prod = dot_prods;
674  } else { // use float dot products
675  s->extract = extract_m8;
676  s->dot_prod = dot_prod;
677  }
678 
679  s->expfunc = e2_m16;
680 }
681 
682 static int modnpf(const int m, const int n)
683 {
684  if ((m % n) == 0)
685  return m;
686  return m + n - (m % n);
687 }
688 
689 static int get_frame(AVFilterContext *ctx, int is_second)
690 {
691  NNEDIContext *s = ctx->priv;
692  AVFilterLink *outlink = ctx->outputs[0];
693  AVFrame *src = s->src;
695  int effective_field = s->field;
696  size_t temp_size;
697  int field_n;
698  int plane;
699 
700  if (effective_field > 1)
701  effective_field -= 2;
702  else if (effective_field < 0)
703  effective_field += 2;
704 
705  if (s->field < 0 && src->interlaced_frame && src->top_field_first == 0)
706  effective_field = 0;
707  else if (s->field < 0 && src->interlaced_frame && src->top_field_first == 1)
708  effective_field = 1;
709  else
710  effective_field = !effective_field;
711 
712  if (s->field > 1 || s->field == -2) {
713  if (is_second) {
714  field_n = (effective_field == 0);
715  } else {
716  field_n = (effective_field == 1);
717  }
718  } else {
719  field_n = effective_field;
720  }
721 
722  s->dst = ff_get_video_buffer(outlink, outlink->w, outlink->h);
723  if (!s->dst)
724  return AVERROR(ENOMEM);
725  av_frame_copy_props(s->dst, src);
726  s->dst->interlaced_frame = 0;
727 
728  frame_data = &s->frame_data;
729 
730  for (plane = 0; plane < s->nb_planes; plane++) {
731  int dst_height = s->planeheight[plane];
732  int dst_width = s->linesize[plane];
733 
734  const int min_alignment = 16;
735  const int min_pad = 10;
736 
737  if (!(s->process_plane & (1 << plane))) {
738  av_image_copy_plane(s->dst->data[plane], s->dst->linesize[plane],
739  src->data[plane], src->linesize[plane],
740  s->linesize[plane],
741  s->planeheight[plane]);
742  continue;
743  }
744 
745  frame_data->padded_width[plane] = dst_width + 64;
746  frame_data->padded_height[plane] = dst_height + 12;
747  frame_data->padded_stride[plane] = modnpf(frame_data->padded_width[plane] + min_pad, min_alignment); // TODO: maybe min_pad is in pixels too?
748  if (!frame_data->paddedp[plane]) {
749  frame_data->paddedp[plane] = av_malloc_array(frame_data->padded_stride[plane], frame_data->padded_height[plane]);
750  if (!frame_data->paddedp[plane])
751  return AVERROR(ENOMEM);
752  }
753 
754  frame_data->dstp[plane] = s->dst->data[plane];
755  frame_data->dst_stride[plane] = s->dst->linesize[plane];
756 
757  if (!frame_data->lcount[plane]) {
758  frame_data->lcount[plane] = av_calloc(dst_height, sizeof(int32_t) * 16);
759  if (!frame_data->lcount[plane])
760  return AVERROR(ENOMEM);
761  } else {
762  memset(frame_data->lcount[plane], 0, dst_height * sizeof(int32_t) * 16);
763  }
764 
765  frame_data->field[plane] = field_n;
766  }
767 
768  if (!frame_data->input) {
769  frame_data->input = av_malloc(512 * sizeof(float));
770  if (!frame_data->input)
771  return AVERROR(ENOMEM);
772  }
773  // evalfunc_0 requires at least padded_width[0] bytes.
774  // evalfunc_1 requires at least 512 floats.
775  if (!frame_data->temp) {
776  temp_size = FFMAX(frame_data->padded_width[0], 512 * sizeof(float));
777  frame_data->temp = av_malloc(temp_size);
778  if (!frame_data->temp)
779  return AVERROR(ENOMEM);
780  }
781 
782  // Copy src to a padded "frame" in frame_data and mirror the edges.
783  s->copy_pad(src, frame_data, s, field_n);
784 
785  // Handles prescreening and the cubic interpolation.
786  s->evalfunc_0(s, frame_data);
787 
788  // The rest.
789  s->evalfunc_1(s, frame_data);
790 
791  return 0;
792 }
793 
795 {
796  AVFilterContext *ctx = inlink->dst;
797  AVFilterLink *outlink = ctx->outputs[0];
798  NNEDIContext *s = ctx->priv;
799  int ret;
800 
801  if ((s->field > 1 ||
802  s->field == -2) && !s->second) {
803  goto second;
804  } else if (s->field > 1 ||
805  s->field == -2) {
806  AVFrame *dst;
807 
808  s->src = s->second;
809  ret = get_frame(ctx, 1);
810  if (ret < 0) {
811  av_frame_free(&s->dst);
812  av_frame_free(&s->src);
813  av_frame_free(&s->second);
814  return ret;
815  }
816  dst = s->dst;
817 
818  if (src->pts != AV_NOPTS_VALUE &&
819  dst->pts != AV_NOPTS_VALUE)
820  dst->pts += src->pts;
821  else
822  dst->pts = AV_NOPTS_VALUE;
823 
824  ret = ff_filter_frame(outlink, dst);
825  if (ret < 0)
826  return ret;
827  if (s->eof)
828  return 0;
829  s->cur_pts = s->second->pts;
830  av_frame_free(&s->second);
831 second:
832  if ((s->deint && src->interlaced_frame &&
833  !ctx->is_disabled) ||
834  (!s->deint && !ctx->is_disabled)) {
835  s->second = src;
836  }
837  }
838 
839  if ((s->deint && !src->interlaced_frame) || ctx->is_disabled) {
840  AVFrame *dst = av_frame_clone(src);
841  if (!dst) {
842  av_frame_free(&src);
843  av_frame_free(&s->second);
844  return AVERROR(ENOMEM);
845  }
846 
847  if (s->field > 1 || s->field == -2) {
848  av_frame_free(&s->second);
849  if ((s->deint && src->interlaced_frame) ||
850  (!s->deint))
851  s->second = src;
852  } else {
853  av_frame_free(&src);
854  }
855  if (dst->pts != AV_NOPTS_VALUE)
856  dst->pts *= 2;
857  return ff_filter_frame(outlink, dst);
858  }
859 
860  s->src = src;
861  ret = get_frame(ctx, 0);
862  if (ret < 0) {
863  av_frame_free(&s->dst);
864  av_frame_free(&s->src);
865  av_frame_free(&s->second);
866  return ret;
867  }
868 
869  if (src->pts != AV_NOPTS_VALUE)
870  s->dst->pts = src->pts * 2;
871  if (s->field <= 1 && s->field > -2) {
872  av_frame_free(&src);
873  s->src = NULL;
874  }
875 
876  return ff_filter_frame(outlink, s->dst);
877 }
878 
880 {
881  AVFilterContext *ctx = link->src;
882  NNEDIContext *s = ctx->priv;
883  int ret;
884 
885  if (s->eof)
886  return AVERROR_EOF;
887 
888  ret = ff_request_frame(ctx->inputs[0]);
889 
890  if (ret == AVERROR_EOF && s->second) {
891  AVFrame *next = av_frame_clone(s->second);
892 
893  if (!next)
894  return AVERROR(ENOMEM);
895 
896  next->pts = s->second->pts * 2 - s->cur_pts;
897  s->eof = 1;
898 
899  filter_frame(ctx->inputs[0], next);
900  } else if (ret < 0) {
901  return ret;
902  }
903 
904  return 0;
905 }
906 
908 {
909  NNEDIContext *s = ctx->priv;
910  FILE *weights_file = NULL;
911  int64_t expected_size = 13574928;
912  int64_t weights_size;
913  float *bdata;
914  size_t bytes_read;
915  const int xdia_table[NUM_NSIZE] = { 8, 16, 32, 48, 8, 16, 32 };
916  const int ydia_table[NUM_NSIZE] = { 6, 6, 6, 6, 4, 4, 4 };
917  const int nns_table[NUM_NNS] = { 16, 32, 64, 128, 256 };
918  const int dims0 = 49 * 4 + 5 * 4 + 9 * 4;
919  const int dims0new = 4 * 65 + 4 * 5;
920  const int dims1 = nns_table[s->nnsparam] * 2 * (xdia_table[s->nsize] * ydia_table[s->nsize] + 1);
921  int dims1tsize = 0;
922  int dims1offset = 0;
923  int ret = 0, i, j, k;
924 
925  weights_file = fopen(s->weights_file, "rb");
926  if (!weights_file) {
927  av_log(ctx, AV_LOG_ERROR, "No weights file provided, aborting!\n");
928  return AVERROR(EINVAL);
929  }
930 
931  if (fseek(weights_file, 0, SEEK_END)) {
932  av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the end of weights file.\n");
933  fclose(weights_file);
934  return AVERROR(EINVAL);
935  }
936 
937  weights_size = ftell(weights_file);
938 
939  if (weights_size == -1) {
940  fclose(weights_file);
941  av_log(ctx, AV_LOG_ERROR, "Couldn't get size of weights file.\n");
942  return AVERROR(EINVAL);
943  } else if (weights_size != expected_size) {
944  fclose(weights_file);
945  av_log(ctx, AV_LOG_ERROR, "Unexpected weights file size.\n");
946  return AVERROR(EINVAL);
947  }
948 
949  if (fseek(weights_file, 0, SEEK_SET)) {
950  fclose(weights_file);
951  av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the start of weights file.\n");
952  return AVERROR(EINVAL);
953  }
954 
955  bdata = (float *)av_malloc(expected_size);
956  if (!bdata) {
957  fclose(weights_file);
958  return AVERROR(ENOMEM);
959  }
960 
961  bytes_read = fread(bdata, 1, expected_size, weights_file);
962 
963  if (bytes_read != (size_t)expected_size) {
964  fclose(weights_file);
965  ret = AVERROR_INVALIDDATA;
966  av_log(ctx, AV_LOG_ERROR, "Couldn't read weights file.\n");
967  goto fail;
968  }
969 
970  fclose(weights_file);
971 
972  for (j = 0; j < NUM_NNS; j++) {
973  for (i = 0; i < NUM_NSIZE; i++) {
974  if (i == s->nsize && j == s->nnsparam)
975  dims1offset = dims1tsize;
976  dims1tsize += nns_table[j] * 2 * (xdia_table[i] * ydia_table[i] + 1) * 2;
977  }
978  }
979 
980  s->weights0 = av_malloc_array(FFMAX(dims0, dims0new), sizeof(float));
981  if (!s->weights0) {
982  ret = AVERROR(ENOMEM);
983  goto fail;
984  }
985 
986  for (i = 0; i < 2; i++) {
987  s->weights1[i] = av_malloc_array(dims1, sizeof(float));
988  if (!s->weights1[i]) {
989  ret = AVERROR(ENOMEM);
990  goto fail;
991  }
992  }
993 
994  // Adjust prescreener weights
995  if (s->pscrn >= 2) {// using new prescreener
996  const float *bdw;
997  int16_t *ws;
998  float *wf;
999  double mean[4] = { 0.0, 0.0, 0.0, 0.0 };
1000  int *offt = av_calloc(4 * 64, sizeof(int));
1001 
1002  if (!offt) {
1003  ret = AVERROR(ENOMEM);
1004  goto fail;
1005  }
1006 
1007  for (j = 0; j < 4; j++)
1008  for (k = 0; k < 64; k++)
1009  offt[j * 64 + k] = ((k >> 3) << 5) + ((j & 3) << 3) + (k & 7);
1010 
1011  bdw = bdata + dims0 + dims0new * (s->pscrn - 2);
1012  ws = (int16_t *)s->weights0;
1013  wf = (float *)&ws[4 * 64];
1014  // Calculate mean weight of each first layer neuron
1015  for (j = 0; j < 4; j++) {
1016  double cmean = 0.0;
1017  for (k = 0; k < 64; k++)
1018  cmean += bdw[offt[j * 64 + k]];
1019  mean[j] = cmean / 64.0;
1020  }
1021  // Factor mean removal and 1.0/127.5 scaling
1022  // into first layer weights. scale to int16 range
1023  for (j = 0; j < 4; j++) {
1024  double scale, mval = 0.0;
1025 
1026  for (k = 0; k < 64; k++)
1027  mval = FFMAX(mval, FFABS((bdw[offt[j * 64 + k]] - mean[j]) / 127.5));
1028  scale = 32767.0 / mval;
1029  for (k = 0; k < 64; k++)
1030  ws[offt[j * 64 + k]] = roundds(((bdw[offt[j * 64 + k]] - mean[j]) / 127.5) * scale);
1031  wf[j] = (float)(mval / 32767.0);
1032  }
1033  memcpy(wf + 4, bdw + 4 * 64, (dims0new - 4 * 64) * sizeof(float));
1034  av_free(offt);
1035  } else { // using old prescreener
1036  double mean[4] = { 0.0, 0.0, 0.0, 0.0 };
1037  // Calculate mean weight of each first layer neuron
1038  for (j = 0; j < 4; j++) {
1039  double cmean = 0.0;
1040  for (k = 0; k < 48; k++)
1041  cmean += bdata[j * 48 + k];
1042  mean[j] = cmean / 48.0;
1043  }
1044  if (s->fapprox & 1) {// use int16 dot products in first layer
1045  int16_t *ws = (int16_t *)s->weights0;
1046  float *wf = (float *)&ws[4 * 48];
1047  // Factor mean removal and 1.0/127.5 scaling
1048  // into first layer weights. scale to int16 range
1049  for (j = 0; j < 4; j++) {
1050  double scale, mval = 0.0;
1051  for (k = 0; k < 48; k++)
1052  mval = FFMAX(mval, FFABS((bdata[j * 48 + k] - mean[j]) / 127.5));
1053  scale = 32767.0 / mval;
1054  for (k = 0; k < 48; k++)
1055  ws[j * 48 + k] = roundds(((bdata[j * 48 + k] - mean[j]) / 127.5) * scale);
1056  wf[j] = (float)(mval / 32767.0);
1057  }
1058  memcpy(wf + 4, bdata + 4 * 48, (dims0 - 4 * 48) * sizeof(float));
1059  } else {// use float dot products in first layer
1060  double half = (1 << 8) - 1;
1061 
1062  half /= 2;
1063 
1064  // Factor mean removal and 1.0/half scaling
1065  // into first layer weights.
1066  for (j = 0; j < 4; j++)
1067  for (k = 0; k < 48; k++)
1068  s->weights0[j * 48 + k] = (float)((bdata[j * 48 + k] - mean[j]) / half);
1069  memcpy(s->weights0 + 4 * 48, bdata + 4 * 48, (dims0 - 4 * 48) * sizeof(float));
1070  }
1071  }
1072 
1073  // Adjust prediction weights
1074  for (i = 0; i < 2; i++) {
1075  const float *bdataT = bdata + dims0 + dims0new * 3 + dims1tsize * s->etype + dims1offset + i * dims1;
1076  const int nnst = nns_table[s->nnsparam];
1077  const int asize = xdia_table[s->nsize] * ydia_table[s->nsize];
1078  const int boff = nnst * 2 * asize;
1079  double *mean = (double *)av_calloc(asize + 1 + nnst * 2, sizeof(double));
1080 
1081  if (!mean) {
1082  ret = AVERROR(ENOMEM);
1083  goto fail;
1084  }
1085 
1086  // Calculate mean weight of each neuron (ignore bias)
1087  for (j = 0; j < nnst * 2; j++) {
1088  double cmean = 0.0;
1089  for (k = 0; k < asize; k++)
1090  cmean += bdataT[j * asize + k];
1091  mean[asize + 1 + j] = cmean / (double)asize;
1092  }
1093  // Calculate mean softmax neuron
1094  for (j = 0; j < nnst; j++) {
1095  for (k = 0; k < asize; k++)
1096  mean[k] += bdataT[j * asize + k] - mean[asize + 1 + j];
1097  mean[asize] += bdataT[boff + j];
1098  }
1099  for (j = 0; j < asize + 1; j++)
1100  mean[j] /= (double)(nnst);
1101 
1102  if (s->fapprox & 2) { // use int16 dot products
1103  int16_t *ws = (int16_t *)s->weights1[i];
1104  float *wf = (float *)&ws[nnst * 2 * asize];
1105  // Factor mean removal into weights, remove global offset from
1106  // softmax neurons, and scale weights to int16 range.
1107  for (j = 0; j < nnst; j++) { // softmax neurons
1108  double scale, mval = 0.0;
1109  for (k = 0; k < asize; k++)
1110  mval = FFMAX(mval, FFABS(bdataT[j * asize + k] - mean[asize + 1 + j] - mean[k]));
1111  scale = 32767.0 / mval;
1112  for (k = 0; k < asize; k++)
1113  ws[j * asize + k] = roundds((bdataT[j * asize + k] - mean[asize + 1 + j] - mean[k]) * scale);
1114  wf[(j >> 2) * 8 + (j & 3)] = (float)(mval / 32767.0);
1115  wf[(j >> 2) * 8 + (j & 3) + 4] = (float)(bdataT[boff + j] - mean[asize]);
1116  }
1117  for (j = nnst; j < nnst * 2; j++) { // elliott neurons
1118  double scale, mval = 0.0;
1119  for (k = 0; k < asize; k++)
1120  mval = FFMAX(mval, FFABS(bdataT[j * asize + k] - mean[asize + 1 + j]));
1121  scale = 32767.0 / mval;
1122  for (k = 0; k < asize; k++)
1123  ws[j * asize + k] = roundds((bdataT[j * asize + k] - mean[asize + 1 + j]) * scale);
1124  wf[(j >> 2) * 8 + (j & 3)] = (float)(mval / 32767.0);
1125  wf[(j >> 2) * 8 + (j & 3) + 4] = bdataT[boff + j];
1126  }
1127  } else { // use float dot products
1128  // Factor mean removal into weights, and remove global
1129  // offset from softmax neurons.
1130  for (j = 0; j < nnst * 2; j++) {
1131  for (k = 0; k < asize; k++) {
1132  const double q = j < nnst ? mean[k] : 0.0;
1133  s->weights1[i][j * asize + k] = (float)(bdataT[j * asize + k] - mean[asize + 1 + j] - q);
1134  }
1135  s->weights1[i][boff + j] = (float)(bdataT[boff + j] - (j < nnst ? mean[asize] : 0.0));
1136  }
1137  }
1138  av_free(mean);
1139  }
1140 
1141  s->nns = nns_table[s->nnsparam];
1142  s->xdia = xdia_table[s->nsize];
1143  s->ydia = ydia_table[s->nsize];
1144  s->asize = xdia_table[s->nsize] * ydia_table[s->nsize];
1145 
1146  s->max_value = 65535 >> 8;
1147 
1148  select_functions(s);
1149 
1150  s->fdsp = avpriv_float_dsp_alloc(0);
1151  if (!s->fdsp)
1152  ret = AVERROR(ENOMEM);
1153 
1154 fail:
1155  av_free(bdata);
1156  return ret;
1157 }
1158 
1160 {
1161  NNEDIContext *s = ctx->priv;
1162  int i;
1163 
1164  av_freep(&s->weights0);
1165 
1166  for (i = 0; i < 2; i++)
1167  av_freep(&s->weights1[i]);
1168 
1169  for (i = 0; i < s->nb_planes; i++) {
1170  av_freep(&s->frame_data.paddedp[i]);
1171  av_freep(&s->frame_data.lcount[i]);
1172  }
1173 
1174  av_freep(&s->frame_data.input);
1175  av_freep(&s->frame_data.temp);
1176  av_freep(&s->fdsp);
1177  av_frame_free(&s->second);
1178 }
1179 
1180 static const AVFilterPad inputs[] = {
1181  {
1182  .name = "default",
1183  .type = AVMEDIA_TYPE_VIDEO,
1184  .filter_frame = filter_frame,
1185  .config_props = config_input,
1186  },
1187  { NULL }
1188 };
1189 
1190 static const AVFilterPad outputs[] = {
1191  {
1192  .name = "default",
1193  .type = AVMEDIA_TYPE_VIDEO,
1194  .config_props = config_output,
1195  .request_frame = request_frame,
1196  },
1197  { NULL }
1198 };
1199 
1201  .name = "nnedi",
1202  .description = NULL_IF_CONFIG_SMALL("Apply neural network edge directed interpolation intra-only deinterlacer."),
1203  .priv_size = sizeof(NNEDIContext),
1204  .priv_class = &nnedi_class,
1205  .init = init,
1206  .uninit = uninit,
1208  .inputs = inputs,
1209  .outputs = outputs,
1211 };
static int filter_frame(AVFilterLink *inlink, AVFrame *src)
Definition: vf_nnedi.c:794
#define FLAGS
Definition: vf_nnedi.c:105
int plane
Definition: avisynth_c.h:384
float(* scalarproduct_float)(const float *v1, const float *v2, int len)
Calculate the scalar product of two vectors of floats.
Definition: float_dsp.h:175
#define NULL
Definition: coverity.c:32
int64_t cur_pts
Definition: vf_nnedi.c:59
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2522
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
BYTE int const BYTE int src_pitch
Definition: avisynth_c.h:908
AVOption.
Definition: opt.h:246
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:100
static int config_input(AVFilterLink *inlink)
Definition: vf_nnedi.c:150
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
misc image utilities
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2562
Main libavfilter public API header.
const char * desc
Definition: nvenc.c:68
char * weights_file
Definition: vf_nnedi.c:53
static void compute_network0_i16(NNEDIContext *s, const float *inputf, const float *weightsf, uint8_t *d)
Definition: vf_nnedi.c:314
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:168
void(* evalfunc_1)(struct NNEDIContext *, FrameData *)
Definition: vf_nnedi.c:88
int num
Numerator.
Definition: rational.h:59
float * weights1[2]
Definition: vf_nnedi.c:67
AVFrame * second
Definition: vf_nnedi.c:56
static void compute_network0(NNEDIContext *s, const float *input, const float *weights, uint8_t *d)
Definition: vf_nnedi.c:297
#define t8
Definition: regdef.h:53
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
FrameData frame_data
Definition: vf_nnedi.c:101
#define fn(a)
#define src
Definition: vp8dsp.c:254
int max_value
Definition: vf_nnedi.c:84
int is_disabled
the enabled state from the last expression evaluation
Definition: avfilter.h:385
int field[3]
Definition: vf_nnedi.c:43
float * temp
Definition: vf_nnedi.c:47
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:244
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
void(* dot_prod)(struct NNEDIContext *, const float *, const float *, float *, const int, const int, const float *)
Definition: vf_nnedi.c:97
static void dot_prod(NNEDIContext *s, const float *data, const float *weights, float *vals, const int n, const int len, const float *scale)
Definition: vf_nnedi.c:268
int padded_height[3]
Definition: vf_nnedi.c:38
BYTE int const BYTE * srcp
Definition: avisynth_c.h:908
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int dst_stride[3]
Definition: vf_nnedi.c:41
static int request_frame(AVFilterLink *link)
Definition: vf_nnedi.c:879
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
static void elliott(float *data, const int n)
Definition: vf_nnedi.c:260
#define NUM_NSIZE
Definition: vf_nnedi.c:635
uint8_t
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
AVOptions.
#define f(width, name)
Definition: cbs_vp9.c:255
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
AVFrame * src
Definition: vf_nnedi.c:55
static const AVFilterPad outputs[]
Definition: vf_nnedi.c:1190
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:388
int process_plane
Definition: vf_nnedi.c:76
static void copy_pad(const AVFrame *src, FrameData *frame_data, NNEDIContext *s, int fn)
Definition: vf_nnedi.c:204
#define height
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range...
Definition: pixfmt.h:100
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:79
#define AVERROR_EOF
End of file.
Definition: error.h:55
int interlaced_frame
The content of the picture is interlaced.
Definition: frame.h:442
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_nnedi.c:1159
const float min_weight_sum
Definition: vf_nnedi.c:560
static void evalfunc_1(NNEDIContext *s, FrameData *frame_data)
Definition: vf_nnedi.c:578
static int roundds(const double f)
Definition: vf_nnedi.c:638
#define av_log(a,...)
static int modnpf(const int m, const int n)
Definition: vf_nnedi.c:682
static const float exp_hi
Definition: vf_nnedi.c:550
A filter pad used for either input or output.
Definition: internal.h:54
AVFILTER_DEFINE_CLASS(nnedi)
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define OFFSET(x)
Definition: vf_nnedi.c:104
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:101
AVFloatDSPContext * fdsp
Definition: vf_nnedi.c:61
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
static const uint16_t mask[17]
Definition: lzw.c:38
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1511
static void select_functions(NNEDIContext *s)
Definition: vf_nnedi.c:645
int32_t(* process_line0)(const uint8_t *, int, uint8_t *, const uint8_t *, const int, const int, const int)
Definition: vf_nnedi.c:93
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
int padded_width[3]
Definition: vf_nnedi.c:37
GLsizei count
Definition: opengl_enc.c:108
#define FFMAX(a, b)
Definition: common.h:94
#define fail()
Definition: checkasm.h:120
int8_t exp
Definition: eval.c:72
static int get_frame(AVFilterContext *ctx, int is_second)
Definition: vf_nnedi.c:689
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
float * input
Definition: vf_nnedi.c:46
static void weighted_avg_elliott_mul5_m16(const float *w, const int n, float *mstd)
Definition: vf_nnedi.c:562
void(* expfunc)(float *, const int)
Definition: vf_nnedi.c:98
static int query_formats(AVFilterContext *ctx)
Definition: vf_nnedi.c:184
void(* evalfunc_0)(struct NNEDIContext *, FrameData *)
Definition: vf_nnedi.c:87
#define FFMIN(a, b)
Definition: common.h:96
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:78
int nnsparam
Definition: vf_nnedi.c:78
#define width
uint8_t w
Definition: llviddspenc.c:38
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static const AVFilterPad inputs[]
Definition: vf_nnedi.c:1180
int32_t
AVFormatContext * ctx
Definition: movenc.c:48
AVFilter ff_vf_nnedi
Definition: vf_nnedi.c:1200
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
#define s(width, name)
Definition: cbs_vp9.c:257
void(* copy_pad)(const AVFrame *, FrameData *, struct NNEDIContext *, int)
Definition: vf_nnedi.c:86
int n
Definition: avisynth_c.h:760
uint8_t * dstp[3]
Definition: vf_nnedi.c:40
static void evalfunc_0(NNEDIContext *s, FrameData *frame_data)
Definition: vf_nnedi.c:418
AVFrame * av_frame_clone(const AVFrame *src)
Create a new frame that references the same data as src.
Definition: frame.c:540
void(* readpixels)(const uint8_t *, const int, float *)
Definition: vf_nnedi.c:91
static void extract_m8(const uint8_t *srcp8, const int stride, const int xdia, const int ydia, float *mstd, float *input)
Definition: vf_nnedi.c:487
void(* compute_network0)(struct NNEDIContext *s, const float *, const float *, uint8_t *)
Definition: vf_nnedi.c:92
void(* extract)(const uint8_t *, const int, const int, const int, float *, float *)
Definition: vf_nnedi.c:96
static void e2_m16(float *s, const int n)
Definition: vf_nnedi.c:552
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:326
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
int padded_stride[3]
Definition: vf_nnedi.c:36
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
int av_image_fill_linesizes(int linesizes[4], enum AVPixelFormat pix_fmt, int width)
Fill plane linesizes for an image with pixel format pix_fmt and width width.
Definition: imgutils.c:89
int32_t * lcount[3]
Definition: vf_nnedi.c:45
Rational number (pair of numerator and denominator).
Definition: rational.h:58
static void compute_network0new(NNEDIContext *s, const float *datai, const float *weights, uint8_t *d)
Definition: vf_nnedi.c:383
static void byte2word48(const uint8_t *t, const int pitch, float *pf)
Definition: vf_nnedi.c:342
const char * name
Filter name.
Definition: avfilter.h:148
static void pixel2float48(const uint8_t *t8, const int pitch, float *p)
Definition: vf_nnedi.c:332
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:133
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:275
AVFrame * dst
Definition: vf_nnedi.c:57
#define flags(name, subs,...)
Definition: cbs_av1.c:561
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
int fapprox
Definition: vf_nnedi.c:82
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
static int32_t process_line0(const uint8_t *tempu, int width, uint8_t *dstp8, const uint8_t *src3p8, const int src_pitch, const int max_value, const int chroma)
Definition: vf_nnedi.c:352
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
int
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
Y , 8bpp.
Definition: pixfmt.h:74
common internal and external API header
static int config_output(AVFilterLink *outlink)
Definition: vf_nnedi.c:167
#define NUM_NNS
Definition: vf_nnedi.c:636
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:80
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
int den
Denominator.
Definition: rational.h:60
int nb_planes
Definition: vf_nnedi.c:62
int linesize[4]
Definition: vf_nnedi.c:63
float * weights0
Definition: vf_nnedi.c:66
#define av_free(p)
int top_field_first
If the content is interlaced, is top field displayed first.
Definition: frame.h:447
static av_cold int init(AVFilterContext *ctx)
Definition: vf_nnedi.c:907
int len
void(* wae5)(const float *, const int, float *)
Definition: vf_nnedi.c:99
static void byte2word64(const uint8_t *t, const int pitch, float *p)
Definition: vf_nnedi.c:373
int planeheight[4]
Definition: vf_nnedi.c:64
A list of supported formats for one end of a filter link.
Definition: formats.h:64
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor ...
Definition: pixfmt.h:258
An instance of a filter.
Definition: avfilter.h:338
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
static const AVOption nnedi_options[]
Definition: vf_nnedi.c:107
#define av_freep(p)
uint8_t * paddedp[3]
Definition: vf_nnedi.c:35
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:99
#define av_malloc_array(a, b)
static void dot_prods(NNEDIContext *s, const float *dataf, const float *weightsf, float *vals, const int n, const int len, const float *scale)
Definition: vf_nnedi.c:281
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:407
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:338
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
static void extract_m8_i16(const uint8_t *srcp, const int stride, const int xdia, const int ydia, float *mstd, float *inputf)
Definition: vf_nnedi.c:520
int(* extract)(AVBSFContext *ctx, AVPacket *pkt, uint8_t **data, int *size)
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
for(j=16;j >0;--j)
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:654
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
static const float exp_lo
Definition: vf_nnedi.c:549
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
static uint8_t tmp[11]
Definition: aes_ctr.c:26