FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
postprocess.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3  *
4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * postprocessing.
26  */
27 
28 /*
29  C MMX MMX2 3DNow AltiVec
30 isVertDC Ec Ec Ec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
34 isHorizDC Ec Ec Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
39 deRing E e e* Ecp
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
42 Vertical X1# a E E
43 Horizontal X1# a E E
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
49 
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
57 */
58 
59 /*
60 TODO:
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66  (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
68 split this huge file
69 optimize c versions
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 ...
72 */
73 
74 //Changelog: use git log
75 
76 #include "config.h"
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
79 #include <inttypes.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 //#undef HAVE_MMXEXT_INLINE
84 //#define HAVE_AMD3DNOW_INLINE
85 //#undef HAVE_MMX_INLINE
86 //#undef ARCH_X86
87 //#define DEBUG_BRIGHTNESS
88 #include "postprocess.h"
89 #include "postprocess_internal.h"
90 #include "libavutil/avstring.h"
91 
92 #include "libavutil/ffversion.h"
93 const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
94 
95 unsigned postproc_version(void)
96 {
99 }
100 
101 const char *postproc_configuration(void)
102 {
103  return FFMPEG_CONFIGURATION;
104 }
105 
106 const char *postproc_license(void)
107 {
108 #define LICENSE_PREFIX "libpostproc license: "
109  return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
110 }
111 
112 #if HAVE_ALTIVEC_H
113 #include <altivec.h>
114 #endif
115 
116 #define GET_MODE_BUFFER_SIZE 500
117 #define OPTIONS_ARRAY_SIZE 10
118 #define BLOCK_SIZE 8
119 #define TEMP_STRIDE 8
120 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
121 
122 #if ARCH_X86 && HAVE_INLINE_ASM
123 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
124 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
125 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
126 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
127 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
128 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
129 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
130 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
131 #endif
132 
133 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
134 
135 
136 static const struct PPFilter filters[]=
137 {
138  {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
139  {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
140 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
141  {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
142  {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
143  {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
144  {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
145  {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
146  {"dr", "dering", 1, 5, 6, DERING},
147  {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
148  {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
149  {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
150  {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
151  {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
152  {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
153  {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
154  {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
155  {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
156  {"be", "bitexact", 1, 0, 0, BITEXACT},
157  {"vi", "visualize", 1, 0, 0, VISUALIZE},
158  {NULL, NULL,0,0,0,0} //End Marker
159 };
160 
161 static const char * const replaceTable[]=
162 {
163  "default", "hb:a,vb:a,dr:a",
164  "de", "hb:a,vb:a,dr:a",
165  "fast", "h1:a,v1:a,dr:a",
166  "fa", "h1:a,v1:a,dr:a",
167  "ac", "ha:a:128:7,va:a,dr:a",
168  NULL //End Marker
169 };
170 
171 
172 #if ARCH_X86 && HAVE_INLINE_ASM
173 static inline void prefetchnta(const void *p)
174 {
175  __asm__ volatile( "prefetchnta (%0)\n\t"
176  : : "r" (p)
177  );
178 }
179 
180 static inline void prefetcht0(const void *p)
181 {
182  __asm__ volatile( "prefetcht0 (%0)\n\t"
183  : : "r" (p)
184  );
185 }
186 
187 static inline void prefetcht1(const void *p)
188 {
189  __asm__ volatile( "prefetcht1 (%0)\n\t"
190  : : "r" (p)
191  );
192 }
193 
194 static inline void prefetcht2(const void *p)
195 {
196  __asm__ volatile( "prefetcht2 (%0)\n\t"
197  : : "r" (p)
198  );
199 }
200 #endif
201 
202 /* The horizontal functions exist only in C because the MMX
203  * code is faster with vertical filters and transposing. */
204 
205 /**
206  * Check if the given 8x8 Block is mostly "flat"
207  */
208 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
209 {
210  int numEq= 0;
211  int y;
212  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
213  const int dcThreshold= dcOffset*2 + 1;
214 
215  for(y=0; y<BLOCK_SIZE; y++){
216  numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
217  numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
218  numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
219  numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
220  numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
221  numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
222  numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
223  src+= stride;
224  }
225  return numEq > c->ppMode.flatnessThreshold;
226 }
227 
228 /**
229  * Check if the middle 8x8 Block in the given 8x16 block is flat
230  */
231 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
232 {
233  int numEq= 0;
234  int y;
235  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
236  const int dcThreshold= dcOffset*2 + 1;
237 
238  src+= stride*4; // src points to begin of the 8x8 Block
239  for(y=0; y<BLOCK_SIZE-1; y++){
240  numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
241  numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
242  numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
243  numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
244  numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
245  numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
246  numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
247  numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
248  src+= stride;
249  }
250  return numEq > c->ppMode.flatnessThreshold;
251 }
252 
253 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
254 {
255  int i;
256  for(i=0; i<2; i++){
257  if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
258  src += stride;
259  if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
260  src += stride;
261  if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
262  src += stride;
263  if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
264  src += stride;
265  }
266  return 1;
267 }
268 
269 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
270 {
271  int x;
272  src+= stride*4;
273  for(x=0; x<BLOCK_SIZE; x+=4){
274  if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
275  if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
276  if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
277  if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
278  }
279  return 1;
280 }
281 
282 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
283 {
284  if( isHorizDC_C(src, stride, c) ){
285  return isHorizMinMaxOk_C(src, stride, c->QP);
286  }else{
287  return 2;
288  }
289 }
290 
291 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
292 {
293  if( isVertDC_C(src, stride, c) ){
294  return isVertMinMaxOk_C(src, stride, c->QP);
295  }else{
296  return 2;
297  }
298 }
299 
300 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
301 {
302  int y;
303  for(y=0; y<BLOCK_SIZE; y++){
304  const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
305 
306  if(FFABS(middleEnergy) < 8*c->QP){
307  const int q=(dst[3] - dst[4])/2;
308  const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
309  const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
310 
311  int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
312  d= FFMAX(d, 0);
313 
314  d= (5*d + 32) >> 6;
315  d*= FFSIGN(-middleEnergy);
316 
317  if(q>0)
318  {
319  d = FFMAX(d, 0);
320  d = FFMIN(d, q);
321  }
322  else
323  {
324  d = FFMIN(d, 0);
325  d = FFMAX(d, q);
326  }
327 
328  dst[3]-= d;
329  dst[4]+= d;
330  }
331  dst+= stride;
332  }
333 }
334 
335 /**
336  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
337  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
338  */
339 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
340 {
341  int y;
342  for(y=0; y<BLOCK_SIZE; y++){
343  const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
344  const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
345 
346  int sums[10];
347  sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
348  sums[1] = sums[0] - first + dst[3];
349  sums[2] = sums[1] - first + dst[4];
350  sums[3] = sums[2] - first + dst[5];
351  sums[4] = sums[3] - first + dst[6];
352  sums[5] = sums[4] - dst[0] + dst[7];
353  sums[6] = sums[5] - dst[1] + last;
354  sums[7] = sums[6] - dst[2] + last;
355  sums[8] = sums[7] - dst[3] + last;
356  sums[9] = sums[8] - dst[4] + last;
357 
358  dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
359  dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
360  dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
361  dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
362  dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
363  dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
364  dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
365  dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
366 
367  dst+= stride;
368  }
369 }
370 
371 /**
372  * Experimental Filter 1 (Horizontal)
373  * will not damage linear gradients
374  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
375  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
376  * MMX2 version does correct clipping C version does not
377  * not identical with the vertical one
378  */
379 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
380 {
381  int y;
382  static uint64_t lut[256];
383  if(!lut[255])
384  {
385  int i;
386  for(i=0; i<256; i++)
387  {
388  int v= i < 128 ? 2*i : 2*(i-256);
389 /*
390 //Simulate 112242211 9-Tap filter
391  uint64_t a= (v/16) & 0xFF;
392  uint64_t b= (v/8) & 0xFF;
393  uint64_t c= (v/4) & 0xFF;
394  uint64_t d= (3*v/8) & 0xFF;
395 */
396 //Simulate piecewise linear interpolation
397  uint64_t a= (v/16) & 0xFF;
398  uint64_t b= (v*3/16) & 0xFF;
399  uint64_t c= (v*5/16) & 0xFF;
400  uint64_t d= (7*v/16) & 0xFF;
401  uint64_t A= (0x100 - a)&0xFF;
402  uint64_t B= (0x100 - b)&0xFF;
403  uint64_t C= (0x100 - c)&0xFF;
404  uint64_t D= (0x100 - c)&0xFF;
405 
406  lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
407  (D<<24) | (C<<16) | (B<<8) | (A);
408  //lut[i] = (v<<32) | (v<<24);
409  }
410  }
411 
412  for(y=0; y<BLOCK_SIZE; y++){
413  int a= src[1] - src[2];
414  int b= src[3] - src[4];
415  int c= src[5] - src[6];
416 
417  int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
418 
419  if(d < QP){
420  int v = d * FFSIGN(-b);
421 
422  src[1] +=v/8;
423  src[2] +=v/4;
424  src[3] +=3*v/8;
425  src[4] -=3*v/8;
426  src[5] -=v/4;
427  src[6] -=v/8;
428  }
429  src+=stride;
430  }
431 }
432 
433 /**
434  * accurate deblock filter
435  */
437  int stride, const PPContext *c, int mode)
438 {
439  int y;
440  const int QP= c->QP;
441  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
442  const int dcThreshold= dcOffset*2 + 1;
443 //START_TIMER
444  src+= step*4; // src points to begin of the 8x8 Block
445  for(y=0; y<8; y++){
446  int numEq= 0;
447 
448  numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
449  numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
450  numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
451  numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
452  numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
453  numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
454  numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
455  numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
456  numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
457  if(numEq > c->ppMode.flatnessThreshold){
458  int min, max, x;
459 
460  if(src[0] > src[step]){
461  max= src[0];
462  min= src[step];
463  }else{
464  max= src[step];
465  min= src[0];
466  }
467  for(x=2; x<8; x+=2){
468  if(src[x*step] > src[(x+1)*step]){
469  if(src[x *step] > max) max= src[ x *step];
470  if(src[(x+1)*step] < min) min= src[(x+1)*step];
471  }else{
472  if(src[(x+1)*step] > max) max= src[(x+1)*step];
473  if(src[ x *step] < min) min= src[ x *step];
474  }
475  }
476  if(max-min < 2*QP){
477  const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
478  const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
479 
480  int sums[10];
481  sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
482  sums[1] = sums[0] - first + src[3*step];
483  sums[2] = sums[1] - first + src[4*step];
484  sums[3] = sums[2] - first + src[5*step];
485  sums[4] = sums[3] - first + src[6*step];
486  sums[5] = sums[4] - src[0*step] + src[7*step];
487  sums[6] = sums[5] - src[1*step] + last;
488  sums[7] = sums[6] - src[2*step] + last;
489  sums[8] = sums[7] - src[3*step] + last;
490  sums[9] = sums[8] - src[4*step] + last;
491 
492  if (mode & VISUALIZE) {
493  src[0*step] =
494  src[1*step] =
495  src[2*step] =
496  src[3*step] =
497  src[4*step] =
498  src[5*step] =
499  src[6*step] =
500  src[7*step] = 128;
501  }
502  src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
503  src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
504  src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
505  src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
506  src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
507  src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
508  src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
509  src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
510  }
511  }else{
512  const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
513 
514  if(FFABS(middleEnergy) < 8*QP){
515  const int q=(src[3*step] - src[4*step])/2;
516  const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
517  const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
518 
519  int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
520  d= FFMAX(d, 0);
521 
522  d= (5*d + 32) >> 6;
523  d*= FFSIGN(-middleEnergy);
524 
525  if(q>0){
526  d = FFMAX(d, 0);
527  d = FFMIN(d, q);
528  }else{
529  d = FFMIN(d, 0);
530  d = FFMAX(d, q);
531  }
532 
533  if ((mode & VISUALIZE) && d) {
534  d= (d < 0) ? 32 : -32;
535  src[3*step]= av_clip_uint8(src[3*step] - d);
536  src[4*step]= av_clip_uint8(src[4*step] + d);
537  d = 0;
538  }
539 
540  src[3*step]-= d;
541  src[4*step]+= d;
542  }
543  }
544 
545  src += stride;
546  }
547 /*if(step==16){
548  STOP_TIMER("step16")
549 }else{
550  STOP_TIMER("stepX")
551 }*/
552 }
553 
554 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
555 //Plain C versions
556 //we always compile C for testing which needs bitexactness
557 #define TEMPLATE_PP_C 1
558 #include "postprocess_template.c"
559 
560 #if HAVE_ALTIVEC
561 # define TEMPLATE_PP_ALTIVEC 1
563 # include "postprocess_template.c"
564 #endif
565 
566 #if ARCH_X86 && HAVE_INLINE_ASM
567 # if CONFIG_RUNTIME_CPUDETECT
568 # define TEMPLATE_PP_MMX 1
569 # include "postprocess_template.c"
570 # define TEMPLATE_PP_MMXEXT 1
571 # include "postprocess_template.c"
572 # define TEMPLATE_PP_3DNOW 1
573 # include "postprocess_template.c"
574 # define TEMPLATE_PP_SSE2 1
575 # include "postprocess_template.c"
576 # else
577 # if HAVE_SSE2_INLINE
578 # define TEMPLATE_PP_SSE2 1
579 # include "postprocess_template.c"
580 # elif HAVE_MMXEXT_INLINE
581 # define TEMPLATE_PP_MMXEXT 1
582 # include "postprocess_template.c"
583 # elif HAVE_AMD3DNOW_INLINE
584 # define TEMPLATE_PP_3DNOW 1
585 # include "postprocess_template.c"
586 # elif HAVE_MMX_INLINE
587 # define TEMPLATE_PP_MMX 1
588 # include "postprocess_template.c"
589 # endif
590 # endif
591 #endif
592 
593 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
594  const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
595 
596 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
597  const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
598 {
599  pp_fn pp = postProcess_C;
600  PPContext *c= (PPContext *)vc;
601  PPMode *ppMode= (PPMode *)vm;
602  c->ppMode= *ppMode; //FIXME
603 
604  if (!(ppMode->lumMode & BITEXACT)) {
605 #if CONFIG_RUNTIME_CPUDETECT
606 #if ARCH_X86 && HAVE_INLINE_ASM
607  // ordered per speed fastest first
608  if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
609  else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
610  else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
611  else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
612 #elif HAVE_ALTIVEC
613  if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
614 #endif
615 #else /* CONFIG_RUNTIME_CPUDETECT */
616 #if HAVE_SSE2_INLINE
617  pp = postProcess_SSE2;
618 #elif HAVE_MMXEXT_INLINE
619  pp = postProcess_MMX2;
620 #elif HAVE_AMD3DNOW_INLINE
621  pp = postProcess_3DNow;
622 #elif HAVE_MMX_INLINE
623  pp = postProcess_MMX;
624 #elif HAVE_ALTIVEC
625  pp = postProcess_altivec;
626 #endif
627 #endif /* !CONFIG_RUNTIME_CPUDETECT */
628  }
629 
630  pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
631 }
632 
633 /* -pp Command line Help
634 */
635 const char pp_help[] =
636 "Available postprocessing filters:\n"
637 "Filters Options\n"
638 "short long name short long option Description\n"
639 "* * a autoq CPU power dependent enabler\n"
640 " c chrom chrominance filtering enabled\n"
641 " y nochrom chrominance filtering disabled\n"
642 " n noluma luma filtering disabled\n"
643 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
644 " 1. difference factor: default=32, higher -> more deblocking\n"
645 " 2. flatness threshold: default=39, lower -> more deblocking\n"
646 " the h & v deblocking filters share these\n"
647 " so you can't set different thresholds for h / v\n"
648 "vb vdeblock (2 threshold) vertical deblocking filter\n"
649 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
650 "va vadeblock (2 threshold) vertical deblocking filter\n"
651 "h1 x1hdeblock experimental h deblock filter 1\n"
652 "v1 x1vdeblock experimental v deblock filter 1\n"
653 "dr dering deringing filter\n"
654 "al autolevels automatic brightness / contrast\n"
655 " f fullyrange stretch luminance to (0..255)\n"
656 "lb linblenddeint linear blend deinterlacer\n"
657 "li linipoldeint linear interpolating deinterlace\n"
658 "ci cubicipoldeint cubic interpolating deinterlacer\n"
659 "md mediandeint median deinterlacer\n"
660 "fd ffmpegdeint ffmpeg deinterlacer\n"
661 "l5 lowpass5 FIR lowpass deinterlacer\n"
662 "de default hb:a,vb:a,dr:a\n"
663 "fa fast h1:a,v1:a,dr:a\n"
664 "ac ha:a:128:7,va:a,dr:a\n"
665 "tn tmpnoise (3 threshold) temporal noise reducer\n"
666 " 1. <= 2. <= 3. larger -> stronger filtering\n"
667 "fq forceQuant <quantizer> force quantizer\n"
668 "Usage:\n"
669 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
670 "long form example:\n"
671 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
672 "short form example:\n"
673 "vb:a/hb:a/lb de,-vb\n"
674 "more examples:\n"
675 "tn:64:128:256\n"
676 "\n"
677 ;
678 
679 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
680 {
682  char *p= temp;
683  static const char filterDelimiters[] = ",/";
684  static const char optionDelimiters[] = ":|";
685  struct PPMode *ppMode;
686  char *filterToken;
687 
688  if (!name) {
689  av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
690  return NULL;
691  }
692 
693  if (!strcmp(name, "help")) {
694  const char *p;
695  for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
696  av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
697  av_log(NULL, AV_LOG_INFO, "%s", temp);
698  }
699  return NULL;
700  }
701 
702  ppMode= av_malloc(sizeof(PPMode));
703 
704  ppMode->lumMode= 0;
705  ppMode->chromMode= 0;
706  ppMode->maxTmpNoise[0]= 700;
707  ppMode->maxTmpNoise[1]= 1500;
708  ppMode->maxTmpNoise[2]= 3000;
709  ppMode->maxAllowedY= 234;
710  ppMode->minAllowedY= 16;
711  ppMode->baseDcDiff= 256/8;
712  ppMode->flatnessThreshold= 56-16-1;
713  ppMode->maxClippedThreshold= 0.01;
714  ppMode->error=0;
715 
716  memset(temp, 0, GET_MODE_BUFFER_SIZE);
717  av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
718 
719  av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
720 
721  for(;;){
722  const char *filterName;
723  int q= 1000000; //PP_QUALITY_MAX;
724  int chrom=-1;
725  int luma=-1;
726  const char *option;
727  const char *options[OPTIONS_ARRAY_SIZE];
728  int i;
729  int filterNameOk=0;
730  int numOfUnknownOptions=0;
731  int enable=1; //does the user want us to enabled or disabled the filter
732  char *tokstate;
733 
734  filterToken= av_strtok(p, filterDelimiters, &tokstate);
735  if(!filterToken) break;
736  p+= strlen(filterToken) + 1; // p points to next filterToken
737  filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
738  if (!filterName) {
739  ppMode->error++;
740  break;
741  }
742  av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
743 
744  if(*filterName == '-'){
745  enable=0;
746  filterName++;
747  }
748 
749  for(;;){ //for all options
750  option= av_strtok(NULL, optionDelimiters, &tokstate);
751  if(!option) break;
752 
753  av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
754  if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
755  else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
756  else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
757  else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
758  else{
759  options[numOfUnknownOptions] = option;
760  numOfUnknownOptions++;
761  }
762  if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
763  }
764  options[numOfUnknownOptions] = NULL;
765 
766  /* replace stuff from the replace Table */
767  for(i=0; replaceTable[2*i]; i++){
768  if(!strcmp(replaceTable[2*i], filterName)){
769  int newlen= strlen(replaceTable[2*i + 1]);
770  int plen;
771  int spaceLeft;
772 
773  p--, *p=',';
774 
775  plen= strlen(p);
776  spaceLeft= p - temp + plen;
777  if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
778  ppMode->error++;
779  break;
780  }
781  memmove(p + newlen, p, plen+1);
782  memcpy(p, replaceTable[2*i + 1], newlen);
783  filterNameOk=1;
784  }
785  }
786 
787  for(i=0; filters[i].shortName; i++){
788  if( !strcmp(filters[i].longName, filterName)
789  || !strcmp(filters[i].shortName, filterName)){
790  ppMode->lumMode &= ~filters[i].mask;
791  ppMode->chromMode &= ~filters[i].mask;
792 
793  filterNameOk=1;
794  if(!enable) break; // user wants to disable it
795 
796  if(q >= filters[i].minLumQuality && luma)
797  ppMode->lumMode|= filters[i].mask;
798  if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
799  if(q >= filters[i].minChromQuality)
800  ppMode->chromMode|= filters[i].mask;
801 
802  if(filters[i].mask == LEVEL_FIX){
803  int o;
804  ppMode->minAllowedY= 16;
805  ppMode->maxAllowedY= 234;
806  for(o=0; options[o]; o++){
807  if( !strcmp(options[o],"fullyrange")
808  ||!strcmp(options[o],"f")){
809  ppMode->minAllowedY= 0;
810  ppMode->maxAllowedY= 255;
811  numOfUnknownOptions--;
812  }
813  }
814  }
815  else if(filters[i].mask == TEMP_NOISE_FILTER)
816  {
817  int o;
818  int numOfNoises=0;
819 
820  for(o=0; options[o]; o++){
821  char *tail;
822  ppMode->maxTmpNoise[numOfNoises]=
823  strtol(options[o], &tail, 0);
824  if(tail!=options[o]){
825  numOfNoises++;
826  numOfUnknownOptions--;
827  if(numOfNoises >= 3) break;
828  }
829  }
830  }
831  else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
832  || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
833  int o;
834 
835  for(o=0; options[o] && o<2; o++){
836  char *tail;
837  int val= strtol(options[o], &tail, 0);
838  if(tail==options[o]) break;
839 
840  numOfUnknownOptions--;
841  if(o==0) ppMode->baseDcDiff= val;
842  else ppMode->flatnessThreshold= val;
843  }
844  }
845  else if(filters[i].mask == FORCE_QUANT){
846  int o;
847  ppMode->forcedQuant= 15;
848 
849  for(o=0; options[o] && o<1; o++){
850  char *tail;
851  int val= strtol(options[o], &tail, 0);
852  if(tail==options[o]) break;
853 
854  numOfUnknownOptions--;
855  ppMode->forcedQuant= val;
856  }
857  }
858  }
859  }
860  if(!filterNameOk) ppMode->error++;
861  ppMode->error += numOfUnknownOptions;
862  }
863 
864  av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
865  if(ppMode->error){
866  av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
867  av_free(ppMode);
868  return NULL;
869  }
870  return ppMode;
871 }
872 
874  av_free(mode);
875 }
876 
877 static void reallocAlign(void **p, int size){
878  av_free(*p);
879  *p= av_mallocz(size);
880 }
881 
882 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
883  int mbWidth = (width+15)>>4;
884  int mbHeight= (height+15)>>4;
885  int i;
886 
887  c->stride= stride;
888  c->qpStride= qpStride;
889 
890  reallocAlign((void **)&c->tempDst, stride*24+32);
891  reallocAlign((void **)&c->tempSrc, stride*24);
892  reallocAlign((void **)&c->tempBlocks, 2*16*8);
893  reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
894  for(i=0; i<256; i++)
895  c->yHistogram[i]= width*height/64*15/256;
896 
897  for(i=0; i<3; i++){
898  //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
899  reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
900  reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
901  }
902 
903  reallocAlign((void **)&c->deintTemp, 2*width+32);
904  reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
905  reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
906  reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
907 }
908 
909 static const char * context_to_name(void * ptr) {
910  return "postproc";
911 }
912 
913 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
914 
915 pp_context *pp_get_context(int width, int height, int cpuCaps){
916  PPContext *c= av_malloc(sizeof(PPContext));
917  int stride= FFALIGN(width, 16); //assumed / will realloc if needed
918  int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
919 
920  memset(c, 0, sizeof(PPContext));
922  if(cpuCaps&PP_FORMAT){
923  c->hChromaSubSample= cpuCaps&0x3;
924  c->vChromaSubSample= (cpuCaps>>4)&0x3;
925  }else{
926  c->hChromaSubSample= 1;
927  c->vChromaSubSample= 1;
928  }
929  if (cpuCaps & PP_CPU_CAPS_AUTO) {
930  c->cpuCaps = av_get_cpu_flags();
931  } else {
932  c->cpuCaps = 0;
933  if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
934  if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
935  if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
936  if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
937  }
938 
939  reallocBuffers(c, width, height, stride, qpStride);
940 
941  c->frameNum=-1;
942 
943  return c;
944 }
945 
946 void pp_free_context(void *vc){
947  PPContext *c = (PPContext*)vc;
948  int i;
949 
950  for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
951  av_free(c->tempBlurred[i]);
952  for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
953  av_free(c->tempBlurredPast[i]);
954 
955  av_free(c->tempBlocks);
956  av_free(c->yHistogram);
957  av_free(c->tempDst);
958  av_free(c->tempSrc);
959  av_free(c->deintTemp);
960  av_free(c->stdQPTable);
961  av_free(c->nonBQPTable);
963 
964  memset(c, 0, sizeof(PPContext));
965 
966  av_free(c);
967 }
968 
969 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
970  uint8_t * dst[3], const int dstStride[3],
971  int width, int height,
972  const QP_STORE_T *QP_store, int QPStride,
973  pp_mode *vm, void *vc, int pict_type)
974 {
975  int mbWidth = (width+15)>>4;
976  int mbHeight= (height+15)>>4;
977  PPMode *mode = vm;
978  PPContext *c = vc;
979  int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
980  int absQPStride = FFABS(QPStride);
981 
982  // c->stride and c->QPStride are always positive
983  if(c->stride < minStride || c->qpStride < absQPStride)
984  reallocBuffers(c, width, height,
985  FFMAX(minStride, c->stride),
986  FFMAX(c->qpStride, absQPStride));
987 
988  if(!QP_store || (mode->lumMode & FORCE_QUANT)){
989  int i;
990  QP_store= c->forcedQPTable;
991  absQPStride = QPStride = 0;
992  if(mode->lumMode & FORCE_QUANT)
993  for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
994  else
995  for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
996  }
997 
998  if(pict_type & PP_PICT_TYPE_QP2){
999  int i;
1000  const int count= FFMAX(mbHeight * absQPStride, mbWidth);
1001  for(i=0; i<(count>>2); i++){
1002  ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1003  }
1004  for(i<<=2; i<count; i++){
1005  c->stdQPTable[i] = QP_store[i]>>1;
1006  }
1007  QP_store= c->stdQPTable;
1008  QPStride= absQPStride;
1009  }
1010 
1011  if(0){
1012  int x,y;
1013  for(y=0; y<mbHeight; y++){
1014  for(x=0; x<mbWidth; x++){
1015  av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1016  }
1017  av_log(c, AV_LOG_INFO, "\n");
1018  }
1019  av_log(c, AV_LOG_INFO, "\n");
1020  }
1021 
1022  if((pict_type&7)!=3){
1023  if (QPStride >= 0){
1024  int i;
1025  const int count= FFMAX(mbHeight * QPStride, mbWidth);
1026  for(i=0; i<(count>>2); i++){
1027  ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1028  }
1029  for(i<<=2; i<count; i++){
1030  c->nonBQPTable[i] = QP_store[i] & 0x3F;
1031  }
1032  } else {
1033  int i,j;
1034  for(i=0; i<mbHeight; i++) {
1035  for(j=0; j<absQPStride; j++) {
1036  c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1037  }
1038  }
1039  }
1040  }
1041 
1042  av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1043  mode->lumMode, mode->chromMode);
1044 
1045  postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1046  width, height, QP_store, QPStride, 0, mode, c);
1047 
1048  if (!(src[1] && src[2] && dst[1] && dst[2]))
1049  return;
1050 
1051  width = (width )>>c->hChromaSubSample;
1052  height = (height)>>c->vChromaSubSample;
1053 
1054  if(mode->chromMode){
1055  postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1056  width, height, QP_store, QPStride, 1, mode, c);
1057  postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1058  width, height, QP_store, QPStride, 2, mode, c);
1059  }
1060  else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1061  linecpy(dst[1], src[1], height, srcStride[1]);
1062  linecpy(dst[2], src[2], height, srcStride[2]);
1063  }else{
1064  int y;
1065  for(y=0; y<height; y++){
1066  memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1067  memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1068  }
1069  }
1070 }