FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
postprocess.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3  *
4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * postprocessing.
26  */
27 
28 /*
29  C MMX MMX2 3DNow AltiVec
30 isVertDC Ec Ec Ec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
34 isHorizDC Ec Ec Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
39 deRing E e e* Ecp
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
42 Vertical X1# a E E
43 Horizontal X1# a E E
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
49 
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
57 */
58 
59 /*
60 TODO:
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66  (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
68 split this huge file
69 optimize c versions
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 ...
72 */
73 
74 //Changelog: use git log
75 
76 #include "config.h"
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
79 #include <inttypes.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 //#undef HAVE_MMXEXT_INLINE
84 //#define HAVE_AMD3DNOW_INLINE
85 //#undef HAVE_MMX_INLINE
86 //#undef ARCH_X86
87 //#define DEBUG_BRIGHTNESS
88 #include "postprocess.h"
89 #include "postprocess_internal.h"
90 #include "libavutil/avstring.h"
91 
92 unsigned postproc_version(void)
93 {
96 }
97 
98 const char *postproc_configuration(void)
99 {
100  return FFMPEG_CONFIGURATION;
101 }
102 
103 const char *postproc_license(void)
104 {
105 #define LICENSE_PREFIX "libpostproc license: "
106  return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
107 }
108 
109 #if HAVE_ALTIVEC_H
110 #include <altivec.h>
111 #endif
112 
113 #define GET_MODE_BUFFER_SIZE 500
114 #define OPTIONS_ARRAY_SIZE 10
115 #define BLOCK_SIZE 8
116 #define TEMP_STRIDE 8
117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
118 
119 #if ARCH_X86 && HAVE_INLINE_ASM
120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
128 #endif
129 
130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
131 
132 
133 static const struct PPFilter filters[]=
134 {
135  {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
136  {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
137 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
138  {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
139  {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
140  {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
141  {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
142  {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
143  {"dr", "dering", 1, 5, 6, DERING},
144  {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
145  {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
146  {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
147  {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
148  {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
149  {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
150  {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
151  {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
152  {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
153  {"be", "bitexact", 1, 0, 0, BITEXACT},
154  {NULL, NULL,0,0,0,0} //End Marker
155 };
156 
157 static const char * const replaceTable[]=
158 {
159  "default", "hb:a,vb:a,dr:a",
160  "de", "hb:a,vb:a,dr:a",
161  "fast", "h1:a,v1:a,dr:a",
162  "fa", "h1:a,v1:a,dr:a",
163  "ac", "ha:a:128:7,va:a,dr:a",
164  NULL //End Marker
165 };
166 
167 
168 #if ARCH_X86 && HAVE_INLINE_ASM
169 static inline void prefetchnta(void *p)
170 {
171  __asm__ volatile( "prefetchnta (%0)\n\t"
172  : : "r" (p)
173  );
174 }
175 
176 static inline void prefetcht0(void *p)
177 {
178  __asm__ volatile( "prefetcht0 (%0)\n\t"
179  : : "r" (p)
180  );
181 }
182 
183 static inline void prefetcht1(void *p)
184 {
185  __asm__ volatile( "prefetcht1 (%0)\n\t"
186  : : "r" (p)
187  );
188 }
189 
190 static inline void prefetcht2(void *p)
191 {
192  __asm__ volatile( "prefetcht2 (%0)\n\t"
193  : : "r" (p)
194  );
195 }
196 #endif
197 
198 /* The horizontal functions exist only in C because the MMX
199  * code is faster with vertical filters and transposing. */
200 
201 /**
202  * Check if the given 8x8 Block is mostly "flat"
203  */
204 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
205 {
206  int numEq= 0;
207  int y;
208  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
209  const int dcThreshold= dcOffset*2 + 1;
210 
211  for(y=0; y<BLOCK_SIZE; y++){
212  numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
213  numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
214  numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
215  numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
216  numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
217  numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
218  numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
219  src+= stride;
220  }
221  return numEq > c->ppMode.flatnessThreshold;
222 }
223 
224 /**
225  * Check if the middle 8x8 Block in the given 8x16 block is flat
226  */
227 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
228 {
229  int numEq= 0;
230  int y;
231  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
232  const int dcThreshold= dcOffset*2 + 1;
233 
234  src+= stride*4; // src points to begin of the 8x8 Block
235  for(y=0; y<BLOCK_SIZE-1; y++){
236  numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
237  numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
238  numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
239  numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
240  numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
241  numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
242  numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
243  numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
244  src+= stride;
245  }
246  return numEq > c->ppMode.flatnessThreshold;
247 }
248 
249 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
250 {
251  int i;
252  for(i=0; i<2; i++){
253  if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
254  src += stride;
255  if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
256  src += stride;
257  if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
258  src += stride;
259  if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
260  src += stride;
261  }
262  return 1;
263 }
264 
265 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
266 {
267  int x;
268  src+= stride*4;
269  for(x=0; x<BLOCK_SIZE; x+=4){
270  if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
271  if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
272  if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
273  if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
274  }
275  return 1;
276 }
277 
278 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
279 {
280  if( isHorizDC_C(src, stride, c) ){
281  return isHorizMinMaxOk_C(src, stride, c->QP);
282  }else{
283  return 2;
284  }
285 }
286 
287 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
288 {
289  if( isVertDC_C(src, stride, c) ){
290  return isVertMinMaxOk_C(src, stride, c->QP);
291  }else{
292  return 2;
293  }
294 }
295 
296 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
297 {
298  int y;
299  for(y=0; y<BLOCK_SIZE; y++){
300  const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
301 
302  if(FFABS(middleEnergy) < 8*c->QP){
303  const int q=(dst[3] - dst[4])/2;
304  const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
305  const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
306 
307  int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
308  d= FFMAX(d, 0);
309 
310  d= (5*d + 32) >> 6;
311  d*= FFSIGN(-middleEnergy);
312 
313  if(q>0)
314  {
315  d = FFMAX(d, 0);
316  d = FFMIN(d, q);
317  }
318  else
319  {
320  d = FFMIN(d, 0);
321  d = FFMAX(d, q);
322  }
323 
324  dst[3]-= d;
325  dst[4]+= d;
326  }
327  dst+= stride;
328  }
329 }
330 
331 /**
332  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
333  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
334  */
335 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
336 {
337  int y;
338  for(y=0; y<BLOCK_SIZE; y++){
339  const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
340  const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
341 
342  int sums[10];
343  sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
344  sums[1] = sums[0] - first + dst[3];
345  sums[2] = sums[1] - first + dst[4];
346  sums[3] = sums[2] - first + dst[5];
347  sums[4] = sums[3] - first + dst[6];
348  sums[5] = sums[4] - dst[0] + dst[7];
349  sums[6] = sums[5] - dst[1] + last;
350  sums[7] = sums[6] - dst[2] + last;
351  sums[8] = sums[7] - dst[3] + last;
352  sums[9] = sums[8] - dst[4] + last;
353 
354  dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
355  dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
356  dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
357  dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
358  dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
359  dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
360  dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
361  dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
362 
363  dst+= stride;
364  }
365 }
366 
367 /**
368  * Experimental Filter 1 (Horizontal)
369  * will not damage linear gradients
370  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
371  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
372  * MMX2 version does correct clipping C version does not
373  * not identical with the vertical one
374  */
375 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
376 {
377  int y;
378  static uint64_t lut[256];
379  if(!lut[255])
380  {
381  int i;
382  for(i=0; i<256; i++)
383  {
384  int v= i < 128 ? 2*i : 2*(i-256);
385 /*
386 //Simulate 112242211 9-Tap filter
387  uint64_t a= (v/16) & 0xFF;
388  uint64_t b= (v/8) & 0xFF;
389  uint64_t c= (v/4) & 0xFF;
390  uint64_t d= (3*v/8) & 0xFF;
391 */
392 //Simulate piecewise linear interpolation
393  uint64_t a= (v/16) & 0xFF;
394  uint64_t b= (v*3/16) & 0xFF;
395  uint64_t c= (v*5/16) & 0xFF;
396  uint64_t d= (7*v/16) & 0xFF;
397  uint64_t A= (0x100 - a)&0xFF;
398  uint64_t B= (0x100 - b)&0xFF;
399  uint64_t C= (0x100 - c)&0xFF;
400  uint64_t D= (0x100 - c)&0xFF;
401 
402  lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
403  (D<<24) | (C<<16) | (B<<8) | (A);
404  //lut[i] = (v<<32) | (v<<24);
405  }
406  }
407 
408  for(y=0; y<BLOCK_SIZE; y++){
409  int a= src[1] - src[2];
410  int b= src[3] - src[4];
411  int c= src[5] - src[6];
412 
413  int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
414 
415  if(d < QP){
416  int v = d * FFSIGN(-b);
417 
418  src[1] +=v/8;
419  src[2] +=v/4;
420  src[3] +=3*v/8;
421  src[4] -=3*v/8;
422  src[5] -=v/4;
423  src[6] -=v/8;
424  }
425  src+=stride;
426  }
427 }
428 
429 /**
430  * accurate deblock filter
431  */
433  int stride, const PPContext *c)
434 {
435  int y;
436  const int QP= c->QP;
437  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
438  const int dcThreshold= dcOffset*2 + 1;
439 //START_TIMER
440  src+= step*4; // src points to begin of the 8x8 Block
441  for(y=0; y<8; y++){
442  int numEq= 0;
443 
444  numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
445  numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
446  numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
447  numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
448  numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
449  numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
450  numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
451  numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
452  numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
453  if(numEq > c->ppMode.flatnessThreshold){
454  int min, max, x;
455 
456  if(src[0] > src[step]){
457  max= src[0];
458  min= src[step];
459  }else{
460  max= src[step];
461  min= src[0];
462  }
463  for(x=2; x<8; x+=2){
464  if(src[x*step] > src[(x+1)*step]){
465  if(src[x *step] > max) max= src[ x *step];
466  if(src[(x+1)*step] < min) min= src[(x+1)*step];
467  }else{
468  if(src[(x+1)*step] > max) max= src[(x+1)*step];
469  if(src[ x *step] < min) min= src[ x *step];
470  }
471  }
472  if(max-min < 2*QP){
473  const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
474  const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
475 
476  int sums[10];
477  sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
478  sums[1] = sums[0] - first + src[3*step];
479  sums[2] = sums[1] - first + src[4*step];
480  sums[3] = sums[2] - first + src[5*step];
481  sums[4] = sums[3] - first + src[6*step];
482  sums[5] = sums[4] - src[0*step] + src[7*step];
483  sums[6] = sums[5] - src[1*step] + last;
484  sums[7] = sums[6] - src[2*step] + last;
485  sums[8] = sums[7] - src[3*step] + last;
486  sums[9] = sums[8] - src[4*step] + last;
487 
488  src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
489  src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
490  src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
491  src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
492  src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
493  src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
494  src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
495  src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
496  }
497  }else{
498  const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
499 
500  if(FFABS(middleEnergy) < 8*QP){
501  const int q=(src[3*step] - src[4*step])/2;
502  const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
503  const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
504 
505  int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
506  d= FFMAX(d, 0);
507 
508  d= (5*d + 32) >> 6;
509  d*= FFSIGN(-middleEnergy);
510 
511  if(q>0){
512  d = FFMAX(d, 0);
513  d = FFMIN(d, q);
514  }else{
515  d = FFMIN(d, 0);
516  d = FFMAX(d, q);
517  }
518 
519  src[3*step]-= d;
520  src[4*step]+= d;
521  }
522  }
523 
524  src += stride;
525  }
526 /*if(step==16){
527  STOP_TIMER("step16")
528 }else{
529  STOP_TIMER("stepX")
530 }*/
531 }
532 
533 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
534 //Plain C versions
535 //we always compile C for testing which needs bitexactness
536 #define TEMPLATE_PP_C 1
537 #include "postprocess_template.c"
538 
539 #if HAVE_ALTIVEC
540 # define TEMPLATE_PP_ALTIVEC 1
542 # include "postprocess_template.c"
543 #endif
544 
545 #if ARCH_X86 && HAVE_INLINE_ASM
546 # if CONFIG_RUNTIME_CPUDETECT
547 # define TEMPLATE_PP_MMX 1
548 # include "postprocess_template.c"
549 # define TEMPLATE_PP_MMXEXT 1
550 # include "postprocess_template.c"
551 # define TEMPLATE_PP_3DNOW 1
552 # include "postprocess_template.c"
553 # define TEMPLATE_PP_SSE2 1
554 # include "postprocess_template.c"
555 # else
556 # if HAVE_SSE2_INLINE
557 # define TEMPLATE_PP_SSE2 1
558 # include "postprocess_template.c"
559 # elif HAVE_MMXEXT_INLINE
560 # define TEMPLATE_PP_MMXEXT 1
561 # include "postprocess_template.c"
562 # elif HAVE_AMD3DNOW_INLINE
563 # define TEMPLATE_PP_3DNOW 1
564 # include "postprocess_template.c"
565 # elif HAVE_MMX_INLINE
566 # define TEMPLATE_PP_MMX 1
567 # include "postprocess_template.c"
568 # endif
569 # endif
570 #endif
571 
572 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
573  const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
574 
575 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
576  const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
577 {
578  pp_fn pp = postProcess_C;
579  PPContext *c= (PPContext *)vc;
580  PPMode *ppMode= (PPMode *)vm;
581  c->ppMode= *ppMode; //FIXME
582 
583  if (!(ppMode->lumMode & BITEXACT)) {
584 #if CONFIG_RUNTIME_CPUDETECT
585 #if ARCH_X86 && HAVE_INLINE_ASM
586  // ordered per speed fastest first
587  if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
588  else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
589  else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
590  else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
591 #elif HAVE_ALTIVEC
592  if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
593 #endif
594 #else /* CONFIG_RUNTIME_CPUDETECT */
595 #if HAVE_SSE2_INLINE
596  pp = postProcess_SSE2;
597 #elif HAVE_MMXEXT_INLINE
598  pp = postProcess_MMX2;
599 #elif HAVE_AMD3DNOW_INLINE
600  pp = postProcess_3DNow;
601 #elif HAVE_MMX_INLINE
602  pp = postProcess_MMX;
603 #elif HAVE_ALTIVEC
604  pp = postProcess_altivec;
605 #endif
606 #endif /* !CONFIG_RUNTIME_CPUDETECT */
607  }
608 
609  pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
610 }
611 
612 /* -pp Command line Help
613 */
614 const char pp_help[] =
615 "Available postprocessing filters:\n"
616 "Filters Options\n"
617 "short long name short long option Description\n"
618 "* * a autoq CPU power dependent enabler\n"
619 " c chrom chrominance filtering enabled\n"
620 " y nochrom chrominance filtering disabled\n"
621 " n noluma luma filtering disabled\n"
622 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
623 " 1. difference factor: default=32, higher -> more deblocking\n"
624 " 2. flatness threshold: default=39, lower -> more deblocking\n"
625 " the h & v deblocking filters share these\n"
626 " so you can't set different thresholds for h / v\n"
627 "vb vdeblock (2 threshold) vertical deblocking filter\n"
628 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
629 "va vadeblock (2 threshold) vertical deblocking filter\n"
630 "h1 x1hdeblock experimental h deblock filter 1\n"
631 "v1 x1vdeblock experimental v deblock filter 1\n"
632 "dr dering deringing filter\n"
633 "al autolevels automatic brightness / contrast\n"
634 " f fullyrange stretch luminance to (0..255)\n"
635 "lb linblenddeint linear blend deinterlacer\n"
636 "li linipoldeint linear interpolating deinterlace\n"
637 "ci cubicipoldeint cubic interpolating deinterlacer\n"
638 "md mediandeint median deinterlacer\n"
639 "fd ffmpegdeint ffmpeg deinterlacer\n"
640 "l5 lowpass5 FIR lowpass deinterlacer\n"
641 "de default hb:a,vb:a,dr:a\n"
642 "fa fast h1:a,v1:a,dr:a\n"
643 "ac ha:a:128:7,va:a,dr:a\n"
644 "tn tmpnoise (3 threshold) temporal noise reducer\n"
645 " 1. <= 2. <= 3. larger -> stronger filtering\n"
646 "fq forceQuant <quantizer> force quantizer\n"
647 "Usage:\n"
648 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
649 "long form example:\n"
650 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
651 "short form example:\n"
652 "vb:a/hb:a/lb de,-vb\n"
653 "more examples:\n"
654 "tn:64:128:256\n"
655 "\n"
656 ;
657 
658 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
659 {
661  char *p= temp;
662  static const char filterDelimiters[] = ",/";
663  static const char optionDelimiters[] = ":|";
664  struct PPMode *ppMode;
665  char *filterToken;
666 
667  if (!name) {
668  av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
669  return NULL;
670  }
671 
672  if (!strcmp(name, "help")) {
673  const char *p;
674  for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
675  av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
676  av_log(NULL, AV_LOG_INFO, "%s", temp);
677  }
678  return NULL;
679  }
680 
681  ppMode= av_malloc(sizeof(PPMode));
682 
683  ppMode->lumMode= 0;
684  ppMode->chromMode= 0;
685  ppMode->maxTmpNoise[0]= 700;
686  ppMode->maxTmpNoise[1]= 1500;
687  ppMode->maxTmpNoise[2]= 3000;
688  ppMode->maxAllowedY= 234;
689  ppMode->minAllowedY= 16;
690  ppMode->baseDcDiff= 256/8;
691  ppMode->flatnessThreshold= 56-16-1;
692  ppMode->maxClippedThreshold= 0.01;
693  ppMode->error=0;
694 
695  memset(temp, 0, GET_MODE_BUFFER_SIZE);
696  av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
697 
698  av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
699 
700  for(;;){
701  const char *filterName;
702  int q= 1000000; //PP_QUALITY_MAX;
703  int chrom=-1;
704  int luma=-1;
705  const char *option;
706  const char *options[OPTIONS_ARRAY_SIZE];
707  int i;
708  int filterNameOk=0;
709  int numOfUnknownOptions=0;
710  int enable=1; //does the user want us to enabled or disabled the filter
711  char *tokstate;
712 
713  filterToken= av_strtok(p, filterDelimiters, &tokstate);
714  if(!filterToken) break;
715  p+= strlen(filterToken) + 1; // p points to next filterToken
716  filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
717  if (!filterName) {
718  ppMode->error++;
719  break;
720  }
721  av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
722 
723  if(*filterName == '-'){
724  enable=0;
725  filterName++;
726  }
727 
728  for(;;){ //for all options
729  option= av_strtok(NULL, optionDelimiters, &tokstate);
730  if(!option) break;
731 
732  av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
733  if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
734  else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
735  else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
736  else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
737  else{
738  options[numOfUnknownOptions] = option;
739  numOfUnknownOptions++;
740  }
741  if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
742  }
743  options[numOfUnknownOptions] = NULL;
744 
745  /* replace stuff from the replace Table */
746  for(i=0; replaceTable[2*i]; i++){
747  if(!strcmp(replaceTable[2*i], filterName)){
748  int newlen= strlen(replaceTable[2*i + 1]);
749  int plen;
750  int spaceLeft;
751 
752  p--, *p=',';
753 
754  plen= strlen(p);
755  spaceLeft= p - temp + plen;
756  if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
757  ppMode->error++;
758  break;
759  }
760  memmove(p + newlen, p, plen+1);
761  memcpy(p, replaceTable[2*i + 1], newlen);
762  filterNameOk=1;
763  }
764  }
765 
766  for(i=0; filters[i].shortName; i++){
767  if( !strcmp(filters[i].longName, filterName)
768  || !strcmp(filters[i].shortName, filterName)){
769  ppMode->lumMode &= ~filters[i].mask;
770  ppMode->chromMode &= ~filters[i].mask;
771 
772  filterNameOk=1;
773  if(!enable) break; // user wants to disable it
774 
775  if(q >= filters[i].minLumQuality && luma)
776  ppMode->lumMode|= filters[i].mask;
777  if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
778  if(q >= filters[i].minChromQuality)
779  ppMode->chromMode|= filters[i].mask;
780 
781  if(filters[i].mask == LEVEL_FIX){
782  int o;
783  ppMode->minAllowedY= 16;
784  ppMode->maxAllowedY= 234;
785  for(o=0; options[o]; o++){
786  if( !strcmp(options[o],"fullyrange")
787  ||!strcmp(options[o],"f")){
788  ppMode->minAllowedY= 0;
789  ppMode->maxAllowedY= 255;
790  numOfUnknownOptions--;
791  }
792  }
793  }
794  else if(filters[i].mask == TEMP_NOISE_FILTER)
795  {
796  int o;
797  int numOfNoises=0;
798 
799  for(o=0; options[o]; o++){
800  char *tail;
801  ppMode->maxTmpNoise[numOfNoises]=
802  strtol(options[o], &tail, 0);
803  if(tail!=options[o]){
804  numOfNoises++;
805  numOfUnknownOptions--;
806  if(numOfNoises >= 3) break;
807  }
808  }
809  }
810  else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
811  || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
812  int o;
813 
814  for(o=0; options[o] && o<2; o++){
815  char *tail;
816  int val= strtol(options[o], &tail, 0);
817  if(tail==options[o]) break;
818 
819  numOfUnknownOptions--;
820  if(o==0) ppMode->baseDcDiff= val;
821  else ppMode->flatnessThreshold= val;
822  }
823  }
824  else if(filters[i].mask == FORCE_QUANT){
825  int o;
826  ppMode->forcedQuant= 15;
827 
828  for(o=0; options[o] && o<1; o++){
829  char *tail;
830  int val= strtol(options[o], &tail, 0);
831  if(tail==options[o]) break;
832 
833  numOfUnknownOptions--;
834  ppMode->forcedQuant= val;
835  }
836  }
837  }
838  }
839  if(!filterNameOk) ppMode->error++;
840  ppMode->error += numOfUnknownOptions;
841  }
842 
843  av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
844  if(ppMode->error){
845  av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
846  av_free(ppMode);
847  return NULL;
848  }
849  return ppMode;
850 }
851 
853  av_free(mode);
854 }
855 
856 static void reallocAlign(void **p, int size){
857  av_free(*p);
858  *p= av_mallocz(size);
859 }
860 
861 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
862  int mbWidth = (width+15)>>4;
863  int mbHeight= (height+15)>>4;
864  int i;
865 
866  c->stride= stride;
867  c->qpStride= qpStride;
868 
869  reallocAlign((void **)&c->tempDst, stride*24+32);
870  reallocAlign((void **)&c->tempSrc, stride*24);
871  reallocAlign((void **)&c->tempBlocks, 2*16*8);
872  reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
873  for(i=0; i<256; i++)
874  c->yHistogram[i]= width*height/64*15/256;
875 
876  for(i=0; i<3; i++){
877  //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
878  reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
879  reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
880  }
881 
882  reallocAlign((void **)&c->deintTemp, 2*width+32);
883  reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
884  reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
885  reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
886 }
887 
888 static const char * context_to_name(void * ptr) {
889  return "postproc";
890 }
891 
892 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
893 
894 pp_context *pp_get_context(int width, int height, int cpuCaps){
895  PPContext *c= av_malloc(sizeof(PPContext));
896  int stride= FFALIGN(width, 16); //assumed / will realloc if needed
897  int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
898 
899  memset(c, 0, sizeof(PPContext));
901  if(cpuCaps&PP_FORMAT){
902  c->hChromaSubSample= cpuCaps&0x3;
903  c->vChromaSubSample= (cpuCaps>>4)&0x3;
904  }else{
905  c->hChromaSubSample= 1;
906  c->vChromaSubSample= 1;
907  }
908  if (cpuCaps & PP_CPU_CAPS_AUTO) {
909  c->cpuCaps = av_get_cpu_flags();
910  } else {
911  c->cpuCaps = 0;
912  if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
913  if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
914  if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
915  if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
916  }
917 
918  reallocBuffers(c, width, height, stride, qpStride);
919 
920  c->frameNum=-1;
921 
922  return c;
923 }
924 
925 void pp_free_context(void *vc){
926  PPContext *c = (PPContext*)vc;
927  int i;
928 
929  for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
930  av_free(c->tempBlurred[i]);
931  for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
932  av_free(c->tempBlurredPast[i]);
933 
934  av_free(c->tempBlocks);
935  av_free(c->yHistogram);
936  av_free(c->tempDst);
937  av_free(c->tempSrc);
938  av_free(c->deintTemp);
939  av_free(c->stdQPTable);
940  av_free(c->nonBQPTable);
942 
943  memset(c, 0, sizeof(PPContext));
944 
945  av_free(c);
946 }
947 
948 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
949  uint8_t * dst[3], const int dstStride[3],
950  int width, int height,
951  const QP_STORE_T *QP_store, int QPStride,
952  pp_mode *vm, void *vc, int pict_type)
953 {
954  int mbWidth = (width+15)>>4;
955  int mbHeight= (height+15)>>4;
956  PPMode *mode = vm;
957  PPContext *c = vc;
958  int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
959  int absQPStride = FFABS(QPStride);
960 
961  // c->stride and c->QPStride are always positive
962  if(c->stride < minStride || c->qpStride < absQPStride)
963  reallocBuffers(c, width, height,
964  FFMAX(minStride, c->stride),
965  FFMAX(c->qpStride, absQPStride));
966 
967  if(!QP_store || (mode->lumMode & FORCE_QUANT)){
968  int i;
969  QP_store= c->forcedQPTable;
970  absQPStride = QPStride = 0;
971  if(mode->lumMode & FORCE_QUANT)
972  for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
973  else
974  for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
975  }
976 
977  if(pict_type & PP_PICT_TYPE_QP2){
978  int i;
979  const int count= mbHeight * absQPStride;
980  for(i=0; i<(count>>2); i++){
981  ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
982  }
983  for(i<<=2; i<count; i++){
984  c->stdQPTable[i] = QP_store[i]>>1;
985  }
986  QP_store= c->stdQPTable;
987  QPStride= absQPStride;
988  }
989 
990  if(0){
991  int x,y;
992  for(y=0; y<mbHeight; y++){
993  for(x=0; x<mbWidth; x++){
994  av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
995  }
996  av_log(c, AV_LOG_INFO, "\n");
997  }
998  av_log(c, AV_LOG_INFO, "\n");
999  }
1000 
1001  if((pict_type&7)!=3){
1002  if (QPStride >= 0){
1003  int i;
1004  const int count= mbHeight * QPStride;
1005  for(i=0; i<(count>>2); i++){
1006  ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1007  }
1008  for(i<<=2; i<count; i++){
1009  c->nonBQPTable[i] = QP_store[i] & 0x3F;
1010  }
1011  } else {
1012  int i,j;
1013  for(i=0; i<mbHeight; i++) {
1014  for(j=0; j<absQPStride; j++) {
1015  c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1016  }
1017  }
1018  }
1019  }
1020 
1021  av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1022  mode->lumMode, mode->chromMode);
1023 
1024  postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1025  width, height, QP_store, QPStride, 0, mode, c);
1026 
1027  width = (width )>>c->hChromaSubSample;
1028  height = (height)>>c->vChromaSubSample;
1029 
1030  if(mode->chromMode){
1031  postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1032  width, height, QP_store, QPStride, 1, mode, c);
1033  postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1034  width, height, QP_store, QPStride, 2, mode, c);
1035  }
1036  else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1037  linecpy(dst[1], src[1], height, srcStride[1]);
1038  linecpy(dst[2], src[2], height, srcStride[2]);
1039  }else{
1040  int y;
1041  for(y=0; y<height; y++){
1042  memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1043  memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1044  }
1045  }
1046 }