FFmpeg
vf_fspp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4  * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  */
22 
23 /**
24  * @file
25  * Fast Simple Post-processing filter
26  * This implementation is based on an algorithm described in
27  * "Aria Nosratinia Embedded Post-Processing for
28  * Enhancement of Compressed Images (1999)"
29  * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30  * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31  * them can be performed once per block, not per pixel. This allows for much
32  * higher speed.
33  *
34  * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35  * project, and ported by Arwa Arif for FFmpeg.
36  */
37 
38 #include "libavutil/emms.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/mem_internal.h"
41 #include "libavutil/opt.h"
42 #include "libavutil/pixdesc.h"
43 #include "internal.h"
44 #include "qp_table.h"
45 #include "vf_fspp.h"
46 #include "video.h"
47 
48 #define OFFSET(x) offsetof(FSPPContext, x)
49 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
50 static const AVOption fspp_options[] = {
51  { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
52  { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
53  { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
54  { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
55  { NULL }
56 };
57 
59 
60 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
61  { 0, 48, 12, 60, 3, 51, 15, 63, },
62  { 32, 16, 44, 28, 35, 19, 47, 31, },
63  { 8, 56, 4, 52, 11, 59, 7, 55, },
64  { 40, 24, 36, 20, 43, 27, 39, 23, },
65  { 2, 50, 14, 62, 1, 49, 13, 61, },
66  { 34, 18, 46, 30, 33, 17, 45, 29, },
67  { 10, 58, 6, 54, 9, 57, 5, 53, },
68  { 42, 26, 38, 22, 41, 25, 37, 21, },
69 };
70 
71 static const short custom_threshold[64] = {
72 // values (296) can't be too high
73 // -it causes too big quant dependence
74 // or maybe overflow(check), which results in some flashing
75  71, 296, 295, 237, 71, 40, 38, 19,
76  245, 193, 185, 121, 102, 73, 53, 27,
77  158, 129, 141, 107, 97, 73, 50, 26,
78  102, 116, 109, 98, 82, 66, 45, 23,
79  71, 94, 95, 81, 70, 56, 38, 20,
80  56, 77, 74, 66, 56, 44, 30, 15,
81  38, 53, 50, 45, 38, 30, 21, 11,
82  20, 27, 26, 23, 20, 15, 11, 5
83 };
84 
85 //This func reads from 1 slice, 1 and clears 0 & 1
86 static void store_slice_c(uint8_t *dst, int16_t *src,
87  ptrdiff_t dst_stride, ptrdiff_t src_stride,
88  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
89 {
90  int y, x;
91 #define STORE(pos) \
92  temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
93  src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
94  if (temp & 0x100) temp = ~(temp >> 31); \
95  dst[x + pos] = temp;
96 
97  for (y = 0; y < height; y++) {
98  const uint8_t *d = dither[y];
99  for (x = 0; x < width; x += 8) {
100  int temp;
101  STORE(0);
102  STORE(1);
103  STORE(2);
104  STORE(3);
105  STORE(4);
106  STORE(5);
107  STORE(6);
108  STORE(7);
109  }
110  src += src_stride;
111  dst += dst_stride;
112  }
113 }
114 
115 //This func reads from 2 slices, 0 & 2 and clears 2-nd
116 static void store_slice2_c(uint8_t *dst, int16_t *src,
117  ptrdiff_t dst_stride, ptrdiff_t src_stride,
118  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
119 {
120  int y, x;
121 #define STORE2(pos) \
122  temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
123  src[x + pos + 16 * src_stride] = 0; \
124  if (temp & 0x100) temp = ~(temp >> 31); \
125  dst[x + pos] = temp;
126 
127  for (y = 0; y < height; y++) {
128  const uint8_t *d = dither[y];
129  for (x = 0; x < width; x += 8) {
130  int temp;
131  STORE2(0);
132  STORE2(1);
133  STORE2(2);
134  STORE2(3);
135  STORE2(4);
136  STORE2(5);
137  STORE2(6);
138  STORE2(7);
139  }
140  src += src_stride;
141  dst += dst_stride;
142  }
143 }
144 
145 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
146 {
147  int a;
148  for (a = 0; a < 64; a++)
149  thr_adr[a] = q * thr_adr_noq[a];
150 }
151 
152 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
153  int dst_stride, int src_stride,
154  int width, int height,
155  uint8_t *qp_store, int qp_stride, int is_luma)
156 {
157  int x, x0, y, es, qy, t;
158 
159  const int stride = is_luma ? p->temp_stride : (width + 16);
160  const int step = 6 - p->log2_count;
161  const int qpsh = 4 - p->hsub * !is_luma;
162  const int qpsv = 4 - p->vsub * !is_luma;
163 
164  DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
165  int16_t *block = (int16_t *)block_align;
166  int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
167 
168  memset(block3, 0, 4 * 8 * BLOCKSZ);
169 
170  if (!src || !dst) return;
171 
172  for (y = 0; y < height; y++) {
173  int index = 8 + 8 * stride + y * stride;
174  memcpy(p->src + index, src + y * src_stride, width);
175  for (x = 0; x < 8; x++) {
176  p->src[index - x - 1] = p->src[index + x ];
177  p->src[index + width + x ] = p->src[index + width - x - 1];
178  }
179  }
180 
181  for (y = 0; y < 8; y++) {
182  memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
183  memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
184  }
185  //FIXME (try edge emu)
186 
187  for (y = 8; y < 24; y++)
188  memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
189 
190  for (y = step; y < height + 8; y += step) { //step= 1,2
191  const int y1 = y - 8 + step; //l5-7 l4-6;
192  qy = y - 4;
193 
194  if (qy > height - 1) qy = height - 1;
195  if (qy < 0) qy = 0;
196 
197  qy = (qy >> qpsv) * qp_stride;
198  p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
199 
200  for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
201  p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
202 
203  if (p->qp)
204  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
205  else
206  for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
207  t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
208 
209  if (t < 0) t = 0; //t always < width-2
210 
211  t = qp_store[qy + (t >> qpsh)];
212  t = ff_norm_qscale(t, p->qscale_type);
213 
214  if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
215  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
216  }
217  p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
218  memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
219  memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
220  }
221 
222  es = width + 8 - x0; // 8, ...
223  if (es > 8)
224  p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
225 
226  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
227  if (es > 3)
228  p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
229 
230  if (!(y1 & 7) && y1) {
231  if (y1 & 8)
232  p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
233  dst_stride, stride, width, 8, 5 - p->log2_count);
234  else
235  p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
236  dst_stride, stride, width, 8, 5 - p->log2_count);
237  }
238  }
239 
240  if (y & 7) { // height % 8 != 0
241  if (y & 8)
242  p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
243  dst_stride, stride, width, y&7, 5 - p->log2_count);
244  else
245  p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
246  dst_stride, stride, width, y&7, 5 - p->log2_count);
247  }
248 }
249 
250 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
251 {
252  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
253  int_simd16_t tmp10, tmp11, tmp12, tmp13;
254  int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
255  int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
256 
257  int16_t *dataptr;
258  int16_t *wsptr;
259  int16_t *threshold;
260  int ctr;
261 
262  dataptr = data;
263  wsptr = output;
264 
265  for (; cnt > 0; cnt -= 2) { //start positions
266  threshold = (int16_t *)thr_adr;//threshold_mtx
267  for (ctr = DCTSIZE; ctr > 0; ctr--) {
268  // Process columns from input, add to output.
269  tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
270  tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
271 
272  tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
273  tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
274 
275  tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
276  tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
277 
278  tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
279  tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
280 
281  // Even part of FDCT
282 
283  tmp10 = tmp0 + tmp3;
284  tmp13 = tmp0 - tmp3;
285  tmp11 = tmp1 + tmp2;
286  tmp12 = tmp1 - tmp2;
287 
288  d0 = tmp10 + tmp11;
289  d4 = tmp10 - tmp11;
290 
291  z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
292  d2 = tmp13 + z1;
293  d6 = tmp13 - z1;
294 
295  // Even part of IDCT
296 
297  THRESHOLD(tmp0, d0, threshold[0 * 8]);
298  THRESHOLD(tmp1, d2, threshold[2 * 8]);
299  THRESHOLD(tmp2, d4, threshold[4 * 8]);
300  THRESHOLD(tmp3, d6, threshold[6 * 8]);
301  tmp0 += 2;
302  tmp10 = (tmp0 + tmp2) >> 2;
303  tmp11 = (tmp0 - tmp2) >> 2;
304 
305  tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
306  tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
307 
308  tmp0 = tmp10 + tmp13; //->temps
309  tmp3 = tmp10 - tmp13; //->temps
310  tmp1 = tmp11 + tmp12; //->temps
311  tmp2 = tmp11 - tmp12; //->temps
312 
313  // Odd part of FDCT
314 
315  tmp10 = tmp4 + tmp5;
316  tmp11 = tmp5 + tmp6;
317  tmp12 = tmp6 + tmp7;
318 
319  z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
320  z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
321  z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
322  z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
323 
324  z11 = tmp7 + z3;
325  z13 = tmp7 - z3;
326 
327  d5 = z13 + z2;
328  d3 = z13 - z2;
329  d1 = z11 + z4;
330  d7 = z11 - z4;
331 
332  // Odd part of IDCT
333 
334  THRESHOLD(tmp4, d1, threshold[1 * 8]);
335  THRESHOLD(tmp5, d3, threshold[3 * 8]);
336  THRESHOLD(tmp6, d5, threshold[5 * 8]);
337  THRESHOLD(tmp7, d7, threshold[7 * 8]);
338 
339  //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
340  z13 = tmp6 + tmp5;
341  z10 = (tmp6 - tmp5) << 1;
342  z11 = tmp4 + tmp7;
343  z12 = (tmp4 - tmp7) << 1;
344 
345  tmp7 = (z11 + z13) >> 2; //+2 !
346  tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
347  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
348  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
349  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
350 
351  tmp6 = tmp12 - tmp7;
352  tmp5 = tmp11 - tmp6;
353  tmp4 = tmp10 + tmp5;
354 
355  wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
356  wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
357  wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
358  wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
359  wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
360  wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
361  wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
362  wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
363  //
364  dataptr++; //next column
365  wsptr++;
366  threshold++;
367  }
368  dataptr += 8; //skip each second start pos
369  wsptr += 8;
370  }
371 }
372 
373 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
374 {
375  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
376  int_simd16_t tmp10, tmp11, tmp12, tmp13;
377  int_simd16_t z5, z10, z11, z12, z13;
378  int16_t *outptr;
379  int16_t *wsptr;
380 
381  cnt *= 4;
382  wsptr = workspace;
383  outptr = output_adr;
384  for (; cnt > 0; cnt--) {
385  // Even part
386  //Simd version reads 4x4 block and transposes it
387  tmp10 = wsptr[2] + wsptr[3];
388  tmp11 = wsptr[2] - wsptr[3];
389 
390  tmp13 = wsptr[0] + wsptr[1];
391  tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
392 
393  tmp0 = tmp10 + tmp13; //->temps
394  tmp3 = tmp10 - tmp13; //->temps
395  tmp1 = tmp11 + tmp12;
396  tmp2 = tmp11 - tmp12;
397 
398  // Odd part
399  //Also transpose, with previous:
400  // ---- ---- ||||
401  // ---- ---- idct ||||
402  // ---- ---- ---> ||||
403  // ---- ---- ||||
404  z13 = wsptr[4] + wsptr[5];
405  z10 = wsptr[4] - wsptr[5];
406  z11 = wsptr[6] + wsptr[7];
407  z12 = wsptr[6] - wsptr[7];
408 
409  tmp7 = z11 + z13;
410  tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
411 
412  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
413  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
414  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
415 
416  tmp6 = (tmp12 << 3) - tmp7;
417  tmp5 = (tmp11 << 3) - tmp6;
418  tmp4 = (tmp10 << 3) + tmp5;
419 
420  // Final output stage: descale and write column
421  outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
422  outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
423  outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
424  outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
425  outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
426  outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
427  outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
428  outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
429  outptr++;
430 
431  wsptr += DCTSIZE; // advance pointer to next row
432  }
433 }
434 
435 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
436 {
437  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
438  int_simd16_t tmp10, tmp11, tmp12, tmp13;
439  int_simd16_t z1, z2, z3, z4, z5, z11, z13;
440  int16_t *dataptr;
441 
442  cnt *= 4;
443  // Pass 1: process rows.
444 
445  dataptr = data;
446  for (; cnt > 0; cnt--) {
447  tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
448  tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
449  tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
450  tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
451  tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
452  tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
453  tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
454  tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
455 
456  // Even part
457 
458  tmp10 = tmp0 + tmp3;
459  tmp13 = tmp0 - tmp3;
460  tmp11 = tmp1 + tmp2;
461  tmp12 = tmp1 - tmp2;
462  //Even columns are written first, this leads to different order of columns
463  //in column_fidct(), but they are processed independently, so all ok.
464  //Later in the row_idct() columns readed at the same order.
465  dataptr[2] = tmp10 + tmp11;
466  dataptr[3] = tmp10 - tmp11;
467 
468  z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
469  dataptr[0] = tmp13 + z1;
470  dataptr[1] = tmp13 - z1;
471 
472  // Odd part
473 
474  tmp10 = (tmp4 + tmp5) << 2;
475  tmp11 = (tmp5 + tmp6) << 2;
476  tmp12 = (tmp6 + tmp7) << 2;
477 
478  z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
479  z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
480  z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
481  z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
482 
483  z11 = tmp7 + z3;
484  z13 = tmp7 - z3;
485 
486  dataptr[4] = z13 + z2;
487  dataptr[5] = z13 - z2;
488  dataptr[6] = z11 + z4;
489  dataptr[7] = z11 - z4;
490 
491  pixels++; // advance pointer to next column
492  dataptr += DCTSIZE;
493  }
494 }
495 
496 static const enum AVPixelFormat pix_fmts[] = {
504 };
505 
507 {
508  AVFilterContext *ctx = inlink->dst;
509  FSPPContext *fspp = ctx->priv;
510  const int h = FFALIGN(inlink->h + 16, 16);
512 
513  fspp->hsub = desc->log2_chroma_w;
514  fspp->vsub = desc->log2_chroma_h;
515 
516  fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
517  fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
518  fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
519 
520  if (!fspp->temp || !fspp->src)
521  return AVERROR(ENOMEM);
522 
523  fspp->store_slice = store_slice_c;
525  fspp->mul_thrmat = mul_thrmat_c;
527  fspp->row_idct = row_idct_c;
528  fspp->row_fdct = row_fdct_c;
529 
530 #if ARCH_X86
531  ff_fspp_init_x86(fspp);
532 #endif
533 
534  return 0;
535 }
536 
538 {
539  AVFilterContext *ctx = inlink->dst;
540  FSPPContext *fspp = ctx->priv;
541  AVFilterLink *outlink = ctx->outputs[0];
542  AVFrame *out = in;
543 
544  int qp_stride = 0;
545  int8_t *qp_table = NULL;
546  int i, bias;
547  int ret = 0;
548  int custom_threshold_m[64];
549 
550  bias = (1 << 4) + fspp->strength;
551 
552  for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
553  custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
554 
555  for (i = 0; i < 8; i++) {
556  fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
557  |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
558  |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
559  |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
560 
561  fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
562  |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
563  |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
564  |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
565  }
566 
567  if (fspp->qp)
568  fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
569 
570  /* if we are not in a constant user quantizer mode and we don't want to use
571  * the quantizers from the B-frames (B-frames often have a higher QP), we
572  * need to save the qp table from the last non B-frame; this is what the
573  * following code block does */
574  if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
575  ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
576  if (ret < 0) {
577  av_frame_free(&in);
578  return ret;
579  }
580 
581  if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
582  av_freep(&fspp->non_b_qp_table);
583  fspp->non_b_qp_table = qp_table;
584  fspp->non_b_qp_stride = qp_stride;
585  }
586  }
587 
588  if (fspp->log2_count && !ctx->is_disabled) {
589  if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
590  qp_table = fspp->non_b_qp_table;
591  qp_stride = fspp->non_b_qp_stride;
592  }
593 
594  if (qp_table || fspp->qp) {
595  const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
596  const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
597 
598  /* get a new frame if in-place is not possible or if the dimensions
599  * are not multiple of 8 */
600  if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
601  const int aligned_w = FFALIGN(inlink->w, 8);
602  const int aligned_h = FFALIGN(inlink->h, 8);
603 
604  out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
605  if (!out) {
606  av_frame_free(&in);
607  ret = AVERROR(ENOMEM);
608  goto finish;
609  }
611  out->width = in->width;
612  out->height = in->height;
613  }
614 
615  filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
616  inlink->w, inlink->h, qp_table, qp_stride, 1);
617  filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
618  cw, ch, qp_table, qp_stride, 0);
619  filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
620  cw, ch, qp_table, qp_stride, 0);
621  emms_c();
622  }
623  }
624 
625  if (in != out) {
626  if (in->data[3])
627  av_image_copy_plane(out->data[3], out->linesize[3],
628  in ->data[3], in ->linesize[3],
629  inlink->w, inlink->h);
630  av_frame_free(&in);
631  }
632  ret = ff_filter_frame(outlink, out);
633 finish:
634  if (qp_table != fspp->non_b_qp_table)
635  av_freep(&qp_table);
636  return ret;
637 }
638 
640 {
641  FSPPContext *fspp = ctx->priv;
642  av_freep(&fspp->temp);
643  av_freep(&fspp->src);
644  av_freep(&fspp->non_b_qp_table);
645 }
646 
647 static const AVFilterPad fspp_inputs[] = {
648  {
649  .name = "default",
650  .type = AVMEDIA_TYPE_VIDEO,
651  .config_props = config_input,
652  .filter_frame = filter_frame,
653  },
654 };
655 
657  .name = "fspp",
658  .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
659  .priv_size = sizeof(FSPPContext),
660  .uninit = uninit,
664  .priv_class = &fspp_class,
666 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:112
MULTIPLY16H
#define MULTIPLY16H(x, k)
Definition: vf_fspp.h:37
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
FIX_0_707106781
#define FIX_0_707106781
Definition: jfdctfst.c:117
FIX_0_541196100
#define FIX_0_541196100
Definition: jfdctfst.c:116
FSPPContext::column_fidct
void(* column_fidct)(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
Definition: vf_fspp.h:83
store_slice2_c
static void store_slice2_c(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.c:116
vf_fspp.h
qp_table.h
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
FSPPContext::hsub
int hsub
Definition: vf_fspp.h:61
STORE
#define STORE(pos)
mem_internal.h
out
FILE * out
Definition: movenc.c:54
FSPPContext::threshold_mtx_noq
uint64_t threshold_mtx_noq[8 *2]
Definition: vf_fspp.h:56
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1018
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2962
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: internal.h:162
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
FLAGS
#define FLAGS
Definition: vf_fspp.c:49
FSPPContext::store_slice
void(* store_slice)(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.h:73
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:88
FSPPContext::vsub
int vsub
Definition: vf_fspp.h:62
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:340
pixdesc.h
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
AVFrame::width
int width
Definition: frame.h:412
AVOption
AVOption.
Definition: opt.h:346
data
const char data[16]
Definition: mxf.c:148
AV_PIX_FMT_YUV440P
@ AV_PIX_FMT_YUV440P
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:106
FSPPContext::src
uint8_t * src
Definition: vf_fspp.h:67
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
ff_norm_qscale
static int ff_norm_qscale(int qscale, enum AVVideoEncParamsType type)
Normalize the qscale factor FIXME Add support for other values of enum AVVideoEncParamsType besides A...
Definition: qp_table.h:39
video.h
FIX_1_082392200
#define FIX_1_082392200
Definition: 4xm.c:159
FSPPContext::row_idct
void(* row_idct)(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fspp.h:86
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:361
av_image_copy_plane
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:374
FIX_2_613125930
#define FIX_2_613125930
Definition: 4xm.c:162
row_fdct_c
static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fspp.c:435
finish
static void finish(void)
Definition: movenc.c:342
BLOCKSZ
#define BLOCKSZ
Definition: vf_fspp.h:29
FIX_0_382683433
#define FIX_0_382683433
Definition: jfdctfst.c:115
fspp_inputs
static const AVFilterPad fspp_inputs[]
Definition: vf_fspp.c:647
custom_threshold
static const short custom_threshold[64]
Definition: vf_fspp.c:71
mul_thrmat_c
static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
Definition: vf_fspp.c:145
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:33
FSPPContext::qscale_type
enum AVVideoEncParamsType qscale_type
Definition: vf_fspp.h:65
av_cold
#define av_cold
Definition: attributes.h:90
ff_vf_fspp
const AVFilter ff_vf_fspp
Definition: vf_fspp.c:656
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
column_fidct_c
static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
Definition: vf_fspp.c:250
AV_PIX_FMT_YUVJ422P
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:86
emms_c
#define emms_c()
Definition: emms.h:63
width
#define width
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
FSPPContext::row_fdct
void(* row_fdct)(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fspp.h:89
DCTSIZE
#define DCTSIZE
Definition: jfdctfst.c:73
FSPPContext::non_b_qp_table
int8_t * non_b_qp_table
Definition: vf_fspp.h:69
FSPPContext::non_b_qp_stride
int non_b_qp_stride
Definition: vf_fspp.h:70
ctx
AVFormatContext * ctx
Definition: movenc.c:48
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_fspp.c:506
store_slice_c
static void store_slice_c(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.c:86
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
FSPPContext::log2_count
int log2_count
Definition: vf_fspp.h:59
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:182
AV_PIX_FMT_YUVJ444P
@ AV_PIX_FMT_YUVJ444P
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:87
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:637
bias
static int bias(int x, int c)
Definition: vqcdec.c:114
FSPPContext::qp
int qp
Definition: vf_fspp.h:64
AV_PIX_FMT_YUVJ420P
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:85
FIX_1_306562965
#define FIX_1_306562965
Definition: jfdctfst.c:118
STORE2
#define STORE2(pos)
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:81
index
int index
Definition: gxfenc.c:89
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_fspp.c:496
MAX_LEVEL
#define MAX_LEVEL
Definition: rl.h:36
AVFrame::pict_type
enum AVPictureType pict_type
Picture type of the frame.
Definition: frame.h:442
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:106
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:109
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
FSPPContext::strength
int strength
Definition: vf_fspp.h:60
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_fspp.c:639
av_frame_is_writable
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:573
OFFSET
#define OFFSET(x)
Definition: vf_fspp.c:48
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
fspp_options
static const AVOption fspp_options[]
Definition: vf_fspp.c:50
row_idct_c
static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fspp.c:373
FSPPContext::threshold_mtx
uint64_t threshold_mtx[8 *2]
Definition: vf_fspp.h:57
FIX_1_847759065
#define FIX_1_847759065
Definition: 4xm.c:161
internal.h
emms.h
FSPPContext::temp_stride
int temp_stride
Definition: vf_fspp.h:63
FSPPContext::use_bframe_qp
int use_bframe_qp
Definition: vf_fspp.h:71
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:31
AV_PIX_FMT_YUVJ440P
@ AV_PIX_FMT_YUVJ440P
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
Definition: pixfmt.h:107
FIX_1_414213562
#define FIX_1_414213562
Definition: 4xm.c:160
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:39
DESCALE
#define DESCALE(x, n)
Definition: jfdctfst.c:134
stride
#define stride
Definition: h264pred_template.c:537
AVFilter
Filter definition.
Definition: avfilter.h:166
ret
ret
Definition: filter_design.txt:187
ff_qp_table_extract
int ff_qp_table_extract(AVFrame *frame, int8_t **table, int *table_w, int *table_h, enum AVVideoEncParamsType *qscale_type)
Extract a libpostproc-compatible QP table - an 8-bit QP value per 16x16 macroblock,...
Definition: qp_table.c:27
AVFrame::height
int height
Definition: frame.h:412
FSPPContext
Definition: vf_fspp.h:54
ff_fspp_init_x86
void ff_fspp_init_x86(FSPPContext *fspp)
Definition: vf_fspp_init.c:37
FIX_1_414213562_A
static const int16_t FIX_1_414213562_A
Definition: vf_fspp.h:48
AV_PICTURE_TYPE_B
@ AV_PICTURE_TYPE_B
Bi-dir predicted.
Definition: avutil.h:281
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:235
filter
static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma)
Definition: vf_fspp.c:152
FSPPContext::mul_thrmat
void(* mul_thrmat)(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
Definition: vf_fspp.h:81
temp
else temp
Definition: vf_mcdeint.c:263
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AVFilterContext
An instance of a filter.
Definition: avfilter.h:407
FSPPContext::store_slice2
void(* store_slice2)(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.h:77
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
desc
const char * desc
Definition: libsvtav1.c:73
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
FSPPContext::temp
int16_t * temp
Definition: vf_fspp.h:68
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: vf_fspp.c:537
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:251
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:183
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:80
d
d
Definition: ffmpeg_filter.c:425
FSPPContext::prev_q
int prev_q
Definition: vf_fspp.h:66
int32_t
int32_t
Definition: audioconvert.c:56
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:155
imgutils.h
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
Definition: frame.h:385
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:79
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(fspp)
int_simd16_t
int32_t int_simd16_t
Definition: vf_fspp.h:43
THRESHOLD
#define THRESHOLD(r, x, t)
Definition: vf_fspp.h:38
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:60