FFmpeg
vf_fspp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4  * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  */
22 
23 /**
24  * @file
25  * Fast Simple Post-processing filter
26  * This implementation is based on an algorithm described in
27  * "Aria Nosratinia Embedded Post-Processing for
28  * Enhancement of Compressed Images (1999)"
29  * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30  * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31  * them can be performed once per block, not per pixel. This allows for much
32  * higher speed.
33  *
34  * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35  * project, and ported by Arwa Arif for FFmpeg.
36  */
37 
38 #include "libavutil/imgutils.h"
39 #include "libavutil/mem_internal.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/pixdesc.h"
42 #include "internal.h"
43 #include "qp_table.h"
44 #include "vf_fspp.h"
45 
46 #define OFFSET(x) offsetof(FSPPContext, x)
47 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
48 static const AVOption fspp_options[] = {
49  { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
50  { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
51  { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
52  { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
53  { NULL }
54 };
55 
57 
58 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
59  { 0, 48, 12, 60, 3, 51, 15, 63, },
60  { 32, 16, 44, 28, 35, 19, 47, 31, },
61  { 8, 56, 4, 52, 11, 59, 7, 55, },
62  { 40, 24, 36, 20, 43, 27, 39, 23, },
63  { 2, 50, 14, 62, 1, 49, 13, 61, },
64  { 34, 18, 46, 30, 33, 17, 45, 29, },
65  { 10, 58, 6, 54, 9, 57, 5, 53, },
66  { 42, 26, 38, 22, 41, 25, 37, 21, },
67 };
68 
69 static const short custom_threshold[64] = {
70 // values (296) can't be too high
71 // -it causes too big quant dependence
72 // or maybe overflow(check), which results in some flashing
73  71, 296, 295, 237, 71, 40, 38, 19,
74  245, 193, 185, 121, 102, 73, 53, 27,
75  158, 129, 141, 107, 97, 73, 50, 26,
76  102, 116, 109, 98, 82, 66, 45, 23,
77  71, 94, 95, 81, 70, 56, 38, 20,
78  56, 77, 74, 66, 56, 44, 30, 15,
79  38, 53, 50, 45, 38, 30, 21, 11,
80  20, 27, 26, 23, 20, 15, 11, 5
81 };
82 
83 //This func reads from 1 slice, 1 and clears 0 & 1
84 static void store_slice_c(uint8_t *dst, int16_t *src,
85  ptrdiff_t dst_stride, ptrdiff_t src_stride,
86  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
87 {
88  int y, x;
89 #define STORE(pos) \
90  temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
91  src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
92  if (temp & 0x100) temp = ~(temp >> 31); \
93  dst[x + pos] = temp;
94 
95  for (y = 0; y < height; y++) {
96  const uint8_t *d = dither[y];
97  for (x = 0; x < width; x += 8) {
98  int temp;
99  STORE(0);
100  STORE(1);
101  STORE(2);
102  STORE(3);
103  STORE(4);
104  STORE(5);
105  STORE(6);
106  STORE(7);
107  }
108  src += src_stride;
109  dst += dst_stride;
110  }
111 }
112 
113 //This func reads from 2 slices, 0 & 2 and clears 2-nd
114 static void store_slice2_c(uint8_t *dst, int16_t *src,
115  ptrdiff_t dst_stride, ptrdiff_t src_stride,
116  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
117 {
118  int y, x;
119 #define STORE2(pos) \
120  temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
121  src[x + pos + 16 * src_stride] = 0; \
122  if (temp & 0x100) temp = ~(temp >> 31); \
123  dst[x + pos] = temp;
124 
125  for (y = 0; y < height; y++) {
126  const uint8_t *d = dither[y];
127  for (x = 0; x < width; x += 8) {
128  int temp;
129  STORE2(0);
130  STORE2(1);
131  STORE2(2);
132  STORE2(3);
133  STORE2(4);
134  STORE2(5);
135  STORE2(6);
136  STORE2(7);
137  }
138  src += src_stride;
139  dst += dst_stride;
140  }
141 }
142 
143 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
144 {
145  int a;
146  for (a = 0; a < 64; a++)
147  thr_adr[a] = q * thr_adr_noq[a];
148 }
149 
150 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
151  int dst_stride, int src_stride,
152  int width, int height,
153  uint8_t *qp_store, int qp_stride, int is_luma)
154 {
155  int x, x0, y, es, qy, t;
156 
157  const int stride = is_luma ? p->temp_stride : (width + 16);
158  const int step = 6 - p->log2_count;
159  const int qpsh = 4 - p->hsub * !is_luma;
160  const int qpsv = 4 - p->vsub * !is_luma;
161 
162  DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
163  int16_t *block = (int16_t *)block_align;
164  int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
165 
166  memset(block3, 0, 4 * 8 * BLOCKSZ);
167 
168  if (!src || !dst) return;
169 
170  for (y = 0; y < height; y++) {
171  int index = 8 + 8 * stride + y * stride;
172  memcpy(p->src + index, src + y * src_stride, width);
173  for (x = 0; x < 8; x++) {
174  p->src[index - x - 1] = p->src[index + x ];
175  p->src[index + width + x ] = p->src[index + width - x - 1];
176  }
177  }
178 
179  for (y = 0; y < 8; y++) {
180  memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
181  memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
182  }
183  //FIXME (try edge emu)
184 
185  for (y = 8; y < 24; y++)
186  memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
187 
188  for (y = step; y < height + 8; y += step) { //step= 1,2
189  const int y1 = y - 8 + step; //l5-7 l4-6;
190  qy = y - 4;
191 
192  if (qy > height - 1) qy = height - 1;
193  if (qy < 0) qy = 0;
194 
195  qy = (qy >> qpsv) * qp_stride;
196  p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
197 
198  for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
199  p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
200 
201  if (p->qp)
202  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
203  else
204  for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
205  t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
206 
207  if (t < 0) t = 0; //t always < width-2
208 
209  t = qp_store[qy + (t >> qpsh)];
210  t = ff_norm_qscale(t, p->qscale_type);
211 
212  if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
213  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
214  }
215  p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
216  memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
217  memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
218  }
219 
220  es = width + 8 - x0; // 8, ...
221  if (es > 8)
222  p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
223 
224  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
225  if (es > 3)
226  p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
227 
228  if (!(y1 & 7) && y1) {
229  if (y1 & 8)
230  p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
231  dst_stride, stride, width, 8, 5 - p->log2_count);
232  else
233  p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
234  dst_stride, stride, width, 8, 5 - p->log2_count);
235  }
236  }
237 
238  if (y & 7) { // height % 8 != 0
239  if (y & 8)
240  p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
241  dst_stride, stride, width, y&7, 5 - p->log2_count);
242  else
243  p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
244  dst_stride, stride, width, y&7, 5 - p->log2_count);
245  }
246 }
247 
248 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
249 {
250  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
251  int_simd16_t tmp10, tmp11, tmp12, tmp13;
252  int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
253  int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
254 
255  int16_t *dataptr;
256  int16_t *wsptr;
257  int16_t *threshold;
258  int ctr;
259 
260  dataptr = data;
261  wsptr = output;
262 
263  for (; cnt > 0; cnt -= 2) { //start positions
264  threshold = (int16_t *)thr_adr;//threshold_mtx
265  for (ctr = DCTSIZE; ctr > 0; ctr--) {
266  // Process columns from input, add to output.
267  tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
268  tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
269 
270  tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
271  tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
272 
273  tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
274  tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
275 
276  tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
277  tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
278 
279  // Even part of FDCT
280 
281  tmp10 = tmp0 + tmp3;
282  tmp13 = tmp0 - tmp3;
283  tmp11 = tmp1 + tmp2;
284  tmp12 = tmp1 - tmp2;
285 
286  d0 = tmp10 + tmp11;
287  d4 = tmp10 - tmp11;
288 
289  z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
290  d2 = tmp13 + z1;
291  d6 = tmp13 - z1;
292 
293  // Even part of IDCT
294 
295  THRESHOLD(tmp0, d0, threshold[0 * 8]);
296  THRESHOLD(tmp1, d2, threshold[2 * 8]);
297  THRESHOLD(tmp2, d4, threshold[4 * 8]);
298  THRESHOLD(tmp3, d6, threshold[6 * 8]);
299  tmp0 += 2;
300  tmp10 = (tmp0 + tmp2) >> 2;
301  tmp11 = (tmp0 - tmp2) >> 2;
302 
303  tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
304  tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
305 
306  tmp0 = tmp10 + tmp13; //->temps
307  tmp3 = tmp10 - tmp13; //->temps
308  tmp1 = tmp11 + tmp12; //->temps
309  tmp2 = tmp11 - tmp12; //->temps
310 
311  // Odd part of FDCT
312 
313  tmp10 = tmp4 + tmp5;
314  tmp11 = tmp5 + tmp6;
315  tmp12 = tmp6 + tmp7;
316 
317  z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
318  z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
319  z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
320  z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
321 
322  z11 = tmp7 + z3;
323  z13 = tmp7 - z3;
324 
325  d5 = z13 + z2;
326  d3 = z13 - z2;
327  d1 = z11 + z4;
328  d7 = z11 - z4;
329 
330  // Odd part of IDCT
331 
332  THRESHOLD(tmp4, d1, threshold[1 * 8]);
333  THRESHOLD(tmp5, d3, threshold[3 * 8]);
334  THRESHOLD(tmp6, d5, threshold[5 * 8]);
335  THRESHOLD(tmp7, d7, threshold[7 * 8]);
336 
337  //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
338  z13 = tmp6 + tmp5;
339  z10 = (tmp6 - tmp5) << 1;
340  z11 = tmp4 + tmp7;
341  z12 = (tmp4 - tmp7) << 1;
342 
343  tmp7 = (z11 + z13) >> 2; //+2 !
344  tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
345  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
346  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
347  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
348 
349  tmp6 = tmp12 - tmp7;
350  tmp5 = tmp11 - tmp6;
351  tmp4 = tmp10 + tmp5;
352 
353  wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
354  wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
355  wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
356  wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
357  wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
358  wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
359  wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
360  wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
361  //
362  dataptr++; //next column
363  wsptr++;
364  threshold++;
365  }
366  dataptr += 8; //skip each second start pos
367  wsptr += 8;
368  }
369 }
370 
371 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
372 {
373  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
374  int_simd16_t tmp10, tmp11, tmp12, tmp13;
375  int_simd16_t z5, z10, z11, z12, z13;
376  int16_t *outptr;
377  int16_t *wsptr;
378 
379  cnt *= 4;
380  wsptr = workspace;
381  outptr = output_adr;
382  for (; cnt > 0; cnt--) {
383  // Even part
384  //Simd version reads 4x4 block and transposes it
385  tmp10 = wsptr[2] + wsptr[3];
386  tmp11 = wsptr[2] - wsptr[3];
387 
388  tmp13 = wsptr[0] + wsptr[1];
389  tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
390 
391  tmp0 = tmp10 + tmp13; //->temps
392  tmp3 = tmp10 - tmp13; //->temps
393  tmp1 = tmp11 + tmp12;
394  tmp2 = tmp11 - tmp12;
395 
396  // Odd part
397  //Also transpose, with previous:
398  // ---- ---- ||||
399  // ---- ---- idct ||||
400  // ---- ---- ---> ||||
401  // ---- ---- ||||
402  z13 = wsptr[4] + wsptr[5];
403  z10 = wsptr[4] - wsptr[5];
404  z11 = wsptr[6] + wsptr[7];
405  z12 = wsptr[6] - wsptr[7];
406 
407  tmp7 = z11 + z13;
408  tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
409 
410  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
411  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
412  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
413 
414  tmp6 = (tmp12 << 3) - tmp7;
415  tmp5 = (tmp11 << 3) - tmp6;
416  tmp4 = (tmp10 << 3) + tmp5;
417 
418  // Final output stage: descale and write column
419  outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
420  outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
421  outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
422  outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
423  outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
424  outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
425  outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
426  outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
427  outptr++;
428 
429  wsptr += DCTSIZE; // advance pointer to next row
430  }
431 }
432 
433 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
434 {
435  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
436  int_simd16_t tmp10, tmp11, tmp12, tmp13;
437  int_simd16_t z1, z2, z3, z4, z5, z11, z13;
438  int16_t *dataptr;
439 
440  cnt *= 4;
441  // Pass 1: process rows.
442 
443  dataptr = data;
444  for (; cnt > 0; cnt--) {
445  tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
446  tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
447  tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
448  tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
449  tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
450  tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
451  tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
452  tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
453 
454  // Even part
455 
456  tmp10 = tmp0 + tmp3;
457  tmp13 = tmp0 - tmp3;
458  tmp11 = tmp1 + tmp2;
459  tmp12 = tmp1 - tmp2;
460  //Even columns are written first, this leads to different order of columns
461  //in column_fidct(), but they are processed independently, so all ok.
462  //Later in the row_idct() columns readed at the same order.
463  dataptr[2] = tmp10 + tmp11;
464  dataptr[3] = tmp10 - tmp11;
465 
466  z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
467  dataptr[0] = tmp13 + z1;
468  dataptr[1] = tmp13 - z1;
469 
470  // Odd part
471 
472  tmp10 = (tmp4 + tmp5) << 2;
473  tmp11 = (tmp5 + tmp6) << 2;
474  tmp12 = (tmp6 + tmp7) << 2;
475 
476  z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
477  z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
478  z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
479  z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
480 
481  z11 = tmp7 + z3;
482  z13 = tmp7 - z3;
483 
484  dataptr[4] = z13 + z2;
485  dataptr[5] = z13 - z2;
486  dataptr[6] = z11 + z4;
487  dataptr[7] = z11 - z4;
488 
489  pixels++; // advance pointer to next column
490  dataptr += DCTSIZE;
491  }
492 }
493 
495 {
496  static const enum AVPixelFormat pix_fmts[] = {
504  };
505 
507  if (!fmts_list)
508  return AVERROR(ENOMEM);
509  return ff_set_common_formats(ctx, fmts_list);
510 }
511 
513 {
514  AVFilterContext *ctx = inlink->dst;
515  FSPPContext *fspp = ctx->priv;
516  const int h = FFALIGN(inlink->h + 16, 16);
518 
519  fspp->hsub = desc->log2_chroma_w;
520  fspp->vsub = desc->log2_chroma_h;
521 
522  fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
523  fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
524  fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
525 
526  if (!fspp->temp || !fspp->src)
527  return AVERROR(ENOMEM);
528 
529  fspp->store_slice = store_slice_c;
531  fspp->mul_thrmat = mul_thrmat_c;
533  fspp->row_idct = row_idct_c;
534  fspp->row_fdct = row_fdct_c;
535 
536  if (ARCH_X86)
537  ff_fspp_init_x86(fspp);
538 
539  return 0;
540 }
541 
543 {
544  AVFilterContext *ctx = inlink->dst;
545  FSPPContext *fspp = ctx->priv;
546  AVFilterLink *outlink = ctx->outputs[0];
547  AVFrame *out = in;
548 
549  int qp_stride = 0;
550  int8_t *qp_table = NULL;
551  int i, bias;
552  int ret = 0;
553  int custom_threshold_m[64];
554 
555  bias = (1 << 4) + fspp->strength;
556 
557  for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
558  custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
559 
560  for (i = 0; i < 8; i++) {
561  fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
562  |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
563  |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
564  |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
565 
566  fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
567  |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
568  |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
569  |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
570  }
571 
572  if (fspp->qp)
573  fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
574 
575  /* if we are not in a constant user quantizer mode and we don't want to use
576  * the quantizers from the B-frames (B-frames often have a higher QP), we
577  * need to save the qp table from the last non B-frame; this is what the
578  * following code block does */
579  if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
580  ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
581  if (ret < 0) {
582  av_frame_free(&in);
583  return ret;
584  }
585 
586  if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
587  av_freep(&fspp->non_b_qp_table);
588  fspp->non_b_qp_table = qp_table;
589  fspp->non_b_qp_stride = qp_stride;
590  }
591  }
592 
593  if (fspp->log2_count && !ctx->is_disabled) {
594  if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
595  qp_table = fspp->non_b_qp_table;
596  qp_stride = fspp->non_b_qp_stride;
597  }
598 
599  if (qp_table || fspp->qp) {
600  const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
601  const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
602 
603  /* get a new frame if in-place is not possible or if the dimensions
604  * are not multiple of 8 */
605  if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
606  const int aligned_w = FFALIGN(inlink->w, 8);
607  const int aligned_h = FFALIGN(inlink->h, 8);
608 
609  out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
610  if (!out) {
611  av_frame_free(&in);
612  ret = AVERROR(ENOMEM);
613  goto finish;
614  }
616  out->width = in->width;
617  out->height = in->height;
618  }
619 
620  filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
621  inlink->w, inlink->h, qp_table, qp_stride, 1);
622  filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
623  cw, ch, qp_table, qp_stride, 0);
624  filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
625  cw, ch, qp_table, qp_stride, 0);
626  emms_c();
627  }
628  }
629 
630  if (in != out) {
631  if (in->data[3])
632  av_image_copy_plane(out->data[3], out->linesize[3],
633  in ->data[3], in ->linesize[3],
634  inlink->w, inlink->h);
635  av_frame_free(&in);
636  }
637  ret = ff_filter_frame(outlink, out);
638 finish:
639  if (qp_table != fspp->non_b_qp_table)
640  av_freep(&qp_table);
641  return ret;
642 }
643 
645 {
646  FSPPContext *fspp = ctx->priv;
647  av_freep(&fspp->temp);
648  av_freep(&fspp->src);
649  av_freep(&fspp->non_b_qp_table);
650 }
651 
652 static const AVFilterPad fspp_inputs[] = {
653  {
654  .name = "default",
655  .type = AVMEDIA_TYPE_VIDEO,
656  .config_props = config_input,
657  .filter_frame = filter_frame,
658  },
659  { NULL }
660 };
661 
662 static const AVFilterPad fspp_outputs[] = {
663  {
664  .name = "default",
665  .type = AVMEDIA_TYPE_VIDEO,
666  },
667  { NULL }
668 };
669 
671  .name = "fspp",
672  .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
673  .priv_size = sizeof(FSPPContext),
674  .uninit = uninit,
676  .inputs = fspp_inputs,
678  .priv_class = &fspp_class,
680 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:97
MULTIPLY16H
#define MULTIPLY16H(x, k)
Definition: vf_fspp.h:36
stride
int stride
Definition: mace.c:144
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
FIX_0_707106781
#define FIX_0_707106781
Definition: jfdctfst.c:118
FIX_0_541196100
#define FIX_0_541196100
Definition: jfdctfst.c:117
FSPPContext::column_fidct
void(* column_fidct)(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
Definition: vf_fspp.h:82
store_slice2_c
static void store_slice2_c(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.c:114
vf_fspp.h
qp_table.h
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
FSPPContext::hsub
int hsub
Definition: vf_fspp.h:60
STORE
#define STORE(pos)
ff_make_format_list
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
mem_internal.h
out
FILE * out
Definition: movenc.c:54
FSPPContext::threshold_mtx_noq
uint64_t threshold_mtx_noq[8 *2]
Definition: vf_fspp.h:55
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:978
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2540
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
FLAGS
#define FLAGS
Definition: vf_fspp.c:47
FSPPContext::store_slice
void(* store_slice)(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.h:72
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:112
FSPPContext::vsub
int vsub
Definition: vf_fspp.h:61
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:303
pixdesc.h
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
index
fg index
Definition: ffmpeg_filter.c:168
AVFrame::width
int width
Definition: frame.h:361
AVOption
AVOption.
Definition: opt.h:247
data
const char data[16]
Definition: mxf.c:143
AV_PIX_FMT_YUV440P
@ AV_PIX_FMT_YUV440P
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:99
FSPPContext::src
uint8_t * src
Definition: vf_fspp.h:66
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:149
FIX_1_082392200
#define FIX_1_082392200
Definition: 4xm.c:158
FSPPContext::row_idct
void(* row_idct)(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fspp.h:85
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:317
av_image_copy_plane
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:374
FIX_2_613125930
#define FIX_2_613125930
Definition: 4xm.c:161
AVFilterFormats
A list of supported formats for one end of a filter link.
Definition: formats.h:64
row_fdct_c
static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fspp.c:433
finish
static void finish(void)
Definition: movenc.c:342
BLOCKSZ
#define BLOCKSZ
Definition: vf_fspp.h:28
FIX_0_382683433
#define FIX_0_382683433
Definition: jfdctfst.c:116
fspp_inputs
static const AVFilterPad fspp_inputs[]
Definition: vf_fspp.c:652
custom_threshold
static const short custom_threshold[64]
Definition: vf_fspp.c:69
mul_thrmat_c
static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
Definition: vf_fspp.c:143
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:54
av_cold
#define av_cold
Definition: attributes.h:90
inputs
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
Definition: filter_design.txt:243
ff_set_common_formats
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:580
ff_vf_fspp
const AVFilter ff_vf_fspp
Definition: vf_fspp.c:670
column_fidct_c
static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
Definition: vf_fspp.c:248
AV_PIX_FMT_YUVJ422P
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:79
width
#define width
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
FSPPContext::row_fdct
void(* row_fdct)(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fspp.h:88
DCTSIZE
#define DCTSIZE
Definition: jfdctfst.c:74
FSPPContext::non_b_qp_table
int8_t * non_b_qp_table
Definition: vf_fspp.h:68
outputs
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:290
FSPPContext::non_b_qp_stride
int non_b_qp_stride
Definition: vf_fspp.h:69
ctx
AVFormatContext * ctx
Definition: movenc.c:48
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_fspp.c:512
store_slice_c
static void store_slice_c(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.c:84
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
FSPPContext::log2_count
int log2_count
Definition: vf_fspp.h:58
AV_PIX_FMT_YUVJ444P
@ AV_PIX_FMT_YUVJ444P
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:80
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:537
FSPPContext::qp
int qp
Definition: vf_fspp.h:63
AV_PIX_FMT_YUVJ420P
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:78
src
#define src
Definition: vp8dsp.c:255
FIX_1_306562965
#define FIX_1_306562965
Definition: jfdctfst.c:119
STORE2
#define STORE2(pos)
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:74
query_formats
static int query_formats(AVFilterContext *ctx)
Definition: vf_fspp.c:494
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
MAX_LEVEL
#define MAX_LEVEL
Definition: rl.h:36
AVFrame::pict_type
enum AVPictureType pict_type
Picture type of the frame.
Definition: frame.h:386
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:116
FSPPContext::strength
int strength
Definition: vf_fspp.h:59
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_fspp.c:644
av_frame_is_writable
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:473
ff_norm_qscale
static int ff_norm_qscale(int qscale, int type)
Normalize the qscale factor FIXME the H264 qscale is a log based scale, mpeg1/2 is not,...
Definition: internal.h:351
OFFSET
#define OFFSET(x)
Definition: vf_fspp.c:46
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
fspp_options
static const AVOption fspp_options[]
Definition: vf_fspp.c:48
row_idct_c
static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fspp.c:371
FSPPContext::threshold_mtx
uint64_t threshold_mtx[8 *2]
Definition: vf_fspp.h:56
FIX_1_847759065
#define FIX_1_847759065
Definition: 4xm.c:160
internal.h
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:116
FSPPContext::temp_stride
int temp_stride
Definition: vf_fspp.h:62
i
int i
Definition: input.c:406
FSPPContext::use_bframe_qp
int use_bframe_qp
Definition: vf_fspp.h:70
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
AV_PIX_FMT_YUVJ440P
@ AV_PIX_FMT_YUVJ440P
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
Definition: pixfmt.h:100
FIX_1_414213562
#define FIX_1_414213562
Definition: 4xm.c:159
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:60
DESCALE
#define DESCALE(x, n)
Definition: jfdctfst.c:135
AVFilter
Filter definition.
Definition: avfilter.h:145
ret
ret
Definition: filter_design.txt:187
fspp_outputs
static const AVFilterPad fspp_outputs[]
Definition: vf_fspp.c:662
AVFrame::height
int height
Definition: frame.h:361
FSPPContext
Definition: vf_fspp.h:53
ff_fspp_init_x86
void ff_fspp_init_x86(FSPPContext *fspp)
Definition: vf_fspp_init.c:37
FIX_1_414213562_A
static const int16_t FIX_1_414213562_A
Definition: vf_fspp.h:47
AV_PICTURE_TYPE_B
@ AV_PICTURE_TYPE_B
Bi-dir predicted.
Definition: avutil.h:276
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:224
filter
static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma)
Definition: vf_fspp.c:150
FSPPContext::mul_thrmat
void(* mul_thrmat)(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
Definition: vf_fspp.h:80
temp
else temp
Definition: vf_mcdeint.c:259
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
AVFilterContext
An instance of a filter.
Definition: avfilter.h:333
FSPPContext::store_slice2
void(* store_slice2)(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.h:76
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:158
desc
const char * desc
Definition: libsvtav1.c:79
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
ff_qp_table_extract
int ff_qp_table_extract(AVFrame *frame, int8_t **table, int *table_w, int *table_h, int *qscale_type)
Extract a libpostproc-compatible QP table - an 8-bit QP value per 16x16 macroblock,...
Definition: qp_table.c:30
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
FSPPContext::temp
int16_t * temp
Definition: vf_fspp.h:67
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: vf_fspp.c:542
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:48
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:241
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
d
d
Definition: ffmpeg_filter.c:156
FSPPContext::prev_q
int prev_q
Definition: vf_fspp.h:65
int32_t
int32_t
Definition: audioconvert.c:56
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:134
imgutils.h
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:334
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
FSPPContext::qscale_type
int qscale_type
Definition: vf_fspp.h:64
h
h
Definition: vp9dsp_template.c:2038
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(fspp)
int_simd16_t
int32_t int_simd16_t
Definition: vf_fspp.h:42
THRESHOLD
#define THRESHOLD(r, x, t)
Definition: vf_fspp.h:37
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:58