FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
postprocess.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3  *
4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * postprocessing.
26  */
27 
28 /*
29  C MMX MMX2 3DNow AltiVec
30 isVertDC Ec Ec Ec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
34 isHorizDC Ec Ec Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
39 deRing E e e* Ecp
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
42 Vertical X1# a E E
43 Horizontal X1# a E E
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
49 
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
57 */
58 
59 /*
60 TODO:
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66  (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
68 split this huge file
69 optimize c versions
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 ...
72 */
73 
74 //Changelog: use git log
75 
76 #include "config.h"
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
79 #include <inttypes.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 //#undef HAVE_MMXEXT_INLINE
84 //#define HAVE_AMD3DNOW_INLINE
85 //#undef HAVE_MMX_INLINE
86 //#undef ARCH_X86
87 //#define DEBUG_BRIGHTNESS
88 #include "postprocess.h"
89 #include "postprocess_internal.h"
90 #include "libavutil/avstring.h"
91 
92 unsigned postproc_version(void)
93 {
96 }
97 
98 const char *postproc_configuration(void)
99 {
100  return FFMPEG_CONFIGURATION;
101 }
102 
103 const char *postproc_license(void)
104 {
105 #define LICENSE_PREFIX "libpostproc license: "
106  return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
107 }
108 
109 #if HAVE_ALTIVEC_H
110 #include <altivec.h>
111 #endif
112 
113 #define GET_MODE_BUFFER_SIZE 500
114 #define OPTIONS_ARRAY_SIZE 10
115 #define BLOCK_SIZE 8
116 #define TEMP_STRIDE 8
117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
118 
119 #if ARCH_X86 && HAVE_INLINE_ASM
120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
128 #endif
129 
130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
131 
132 
133 static const struct PPFilter filters[]=
134 {
135  {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
136  {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
137 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
138  {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
139  {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
140  {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
141  {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
142  {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
143  {"dr", "dering", 1, 5, 6, DERING},
144  {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
145  {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
146  {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
147  {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
148  {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
149  {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
150  {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
151  {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
152  {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
153  {"be", "bitexact", 1, 0, 0, BITEXACT},
154  {NULL, NULL,0,0,0,0} //End Marker
155 };
156 
157 static const char *replaceTable[]=
158 {
159  "default", "hb:a,vb:a,dr:a",
160  "de", "hb:a,vb:a,dr:a",
161  "fast", "h1:a,v1:a,dr:a",
162  "fa", "h1:a,v1:a,dr:a",
163  "ac", "ha:a:128:7,va:a,dr:a",
164  NULL //End Marker
165 };
166 
167 
168 #if ARCH_X86 && HAVE_INLINE_ASM
169 static inline void prefetchnta(void *p)
170 {
171  __asm__ volatile( "prefetchnta (%0)\n\t"
172  : : "r" (p)
173  );
174 }
175 
176 static inline void prefetcht0(void *p)
177 {
178  __asm__ volatile( "prefetcht0 (%0)\n\t"
179  : : "r" (p)
180  );
181 }
182 
183 static inline void prefetcht1(void *p)
184 {
185  __asm__ volatile( "prefetcht1 (%0)\n\t"
186  : : "r" (p)
187  );
188 }
189 
190 static inline void prefetcht2(void *p)
191 {
192  __asm__ volatile( "prefetcht2 (%0)\n\t"
193  : : "r" (p)
194  );
195 }
196 #endif
197 
198 /* The horizontal functions exist only in C because the MMX
199  * code is faster with vertical filters and transposing. */
200 
201 /**
202  * Check if the given 8x8 Block is mostly "flat"
203  */
204 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
205 {
206  int numEq= 0;
207  int y;
208  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
209  const int dcThreshold= dcOffset*2 + 1;
210 
211  for(y=0; y<BLOCK_SIZE; y++){
212  if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
213  if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
214  if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
215  if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
216  if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
217  if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
218  if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
219  src+= stride;
220  }
221  return numEq > c->ppMode.flatnessThreshold;
222 }
223 
224 /**
225  * Check if the middle 8x8 Block in the given 8x16 block is flat
226  */
227 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
228 {
229  int numEq= 0;
230  int y;
231  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
232  const int dcThreshold= dcOffset*2 + 1;
233 
234  src+= stride*4; // src points to begin of the 8x8 Block
235  for(y=0; y<BLOCK_SIZE-1; y++){
236  if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
237  if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
238  if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
239  if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
240  if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
241  if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
242  if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
243  if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
244  src+= stride;
245  }
246  return numEq > c->ppMode.flatnessThreshold;
247 }
248 
249 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
250 {
251  int i;
252  for(i=0; i<2; i++){
253  if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
254  src += stride;
255  if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
256  src += stride;
257  if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
258  src += stride;
259  if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
260  src += stride;
261  }
262  return 1;
263 }
264 
265 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
266 {
267  int x;
268  src+= stride*4;
269  for(x=0; x<BLOCK_SIZE; x+=4){
270  if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
271  if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
272  if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
273  if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
274  }
275  return 1;
276 }
277 
278 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
279 {
280  if( isHorizDC_C(src, stride, c) ){
281  if( isHorizMinMaxOk_C(src, stride, c->QP) )
282  return 1;
283  else
284  return 0;
285  }else{
286  return 2;
287  }
288 }
289 
290 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
291 {
292  if( isVertDC_C(src, stride, c) ){
293  if( isVertMinMaxOk_C(src, stride, c->QP) )
294  return 1;
295  else
296  return 0;
297  }else{
298  return 2;
299  }
300 }
301 
302 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
303 {
304  int y;
305  for(y=0; y<BLOCK_SIZE; y++){
306  const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
307 
308  if(FFABS(middleEnergy) < 8*c->QP){
309  const int q=(dst[3] - dst[4])/2;
310  const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
311  const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
312 
313  int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
314  d= FFMAX(d, 0);
315 
316  d= (5*d + 32) >> 6;
317  d*= FFSIGN(-middleEnergy);
318 
319  if(q>0)
320  {
321  d= d<0 ? 0 : d;
322  d= d>q ? q : d;
323  }
324  else
325  {
326  d= d>0 ? 0 : d;
327  d= d<q ? q : d;
328  }
329 
330  dst[3]-= d;
331  dst[4]+= d;
332  }
333  dst+= stride;
334  }
335 }
336 
337 /**
338  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
339  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
340  */
341 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
342 {
343  int y;
344  for(y=0; y<BLOCK_SIZE; y++){
345  const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
346  const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
347 
348  int sums[10];
349  sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
350  sums[1] = sums[0] - first + dst[3];
351  sums[2] = sums[1] - first + dst[4];
352  sums[3] = sums[2] - first + dst[5];
353  sums[4] = sums[3] - first + dst[6];
354  sums[5] = sums[4] - dst[0] + dst[7];
355  sums[6] = sums[5] - dst[1] + last;
356  sums[7] = sums[6] - dst[2] + last;
357  sums[8] = sums[7] - dst[3] + last;
358  sums[9] = sums[8] - dst[4] + last;
359 
360  dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
361  dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
362  dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
363  dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
364  dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
365  dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
366  dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
367  dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
368 
369  dst+= stride;
370  }
371 }
372 
373 /**
374  * Experimental Filter 1 (Horizontal)
375  * will not damage linear gradients
376  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
377  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
378  * MMX2 version does correct clipping C version does not
379  * not identical with the vertical one
380  */
381 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
382 {
383  int y;
384  static uint64_t lut[256];
385  if(!lut[255])
386  {
387  int i;
388  for(i=0; i<256; i++)
389  {
390  int v= i < 128 ? 2*i : 2*(i-256);
391 /*
392 //Simulate 112242211 9-Tap filter
393  uint64_t a= (v/16) & 0xFF;
394  uint64_t b= (v/8) & 0xFF;
395  uint64_t c= (v/4) & 0xFF;
396  uint64_t d= (3*v/8) & 0xFF;
397 */
398 //Simulate piecewise linear interpolation
399  uint64_t a= (v/16) & 0xFF;
400  uint64_t b= (v*3/16) & 0xFF;
401  uint64_t c= (v*5/16) & 0xFF;
402  uint64_t d= (7*v/16) & 0xFF;
403  uint64_t A= (0x100 - a)&0xFF;
404  uint64_t B= (0x100 - b)&0xFF;
405  uint64_t C= (0x100 - c)&0xFF;
406  uint64_t D= (0x100 - c)&0xFF;
407 
408  lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
409  (D<<24) | (C<<16) | (B<<8) | (A);
410  //lut[i] = (v<<32) | (v<<24);
411  }
412  }
413 
414  for(y=0; y<BLOCK_SIZE; y++){
415  int a= src[1] - src[2];
416  int b= src[3] - src[4];
417  int c= src[5] - src[6];
418 
419  int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
420 
421  if(d < QP){
422  int v = d * FFSIGN(-b);
423 
424  src[1] +=v/8;
425  src[2] +=v/4;
426  src[3] +=3*v/8;
427  src[4] -=3*v/8;
428  src[5] -=v/4;
429  src[6] -=v/8;
430  }
431  src+=stride;
432  }
433 }
434 
435 /**
436  * accurate deblock filter
437  */
439  int stride, const PPContext *c)
440 {
441  int y;
442  const int QP= c->QP;
443  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
444  const int dcThreshold= dcOffset*2 + 1;
445 //START_TIMER
446  src+= step*4; // src points to begin of the 8x8 Block
447  for(y=0; y<8; y++){
448  int numEq= 0;
449 
450  if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
451  if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
452  if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
453  if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
454  if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
455  if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
456  if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
457  if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
458  if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
459  if(numEq > c->ppMode.flatnessThreshold){
460  int min, max, x;
461 
462  if(src[0] > src[step]){
463  max= src[0];
464  min= src[step];
465  }else{
466  max= src[step];
467  min= src[0];
468  }
469  for(x=2; x<8; x+=2){
470  if(src[x*step] > src[(x+1)*step]){
471  if(src[x *step] > max) max= src[ x *step];
472  if(src[(x+1)*step] < min) min= src[(x+1)*step];
473  }else{
474  if(src[(x+1)*step] > max) max= src[(x+1)*step];
475  if(src[ x *step] < min) min= src[ x *step];
476  }
477  }
478  if(max-min < 2*QP){
479  const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
480  const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
481 
482  int sums[10];
483  sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
484  sums[1] = sums[0] - first + src[3*step];
485  sums[2] = sums[1] - first + src[4*step];
486  sums[3] = sums[2] - first + src[5*step];
487  sums[4] = sums[3] - first + src[6*step];
488  sums[5] = sums[4] - src[0*step] + src[7*step];
489  sums[6] = sums[5] - src[1*step] + last;
490  sums[7] = sums[6] - src[2*step] + last;
491  sums[8] = sums[7] - src[3*step] + last;
492  sums[9] = sums[8] - src[4*step] + last;
493 
494  src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
495  src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
496  src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
497  src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
498  src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
499  src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
500  src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
501  src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
502  }
503  }else{
504  const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
505 
506  if(FFABS(middleEnergy) < 8*QP){
507  const int q=(src[3*step] - src[4*step])/2;
508  const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
509  const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
510 
511  int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
512  d= FFMAX(d, 0);
513 
514  d= (5*d + 32) >> 6;
515  d*= FFSIGN(-middleEnergy);
516 
517  if(q>0){
518  d= d<0 ? 0 : d;
519  d= d>q ? q : d;
520  }else{
521  d= d>0 ? 0 : d;
522  d= d<q ? q : d;
523  }
524 
525  src[3*step]-= d;
526  src[4*step]+= d;
527  }
528  }
529 
530  src += stride;
531  }
532 /*if(step==16){
533  STOP_TIMER("step16")
534 }else{
535  STOP_TIMER("stepX")
536 }*/
537 }
538 
539 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
540 //Plain C versions
541 //we always compile C for testing which needs bitexactness
542 #define TEMPLATE_PP_C 1
543 #include "postprocess_template.c"
544 
545 #if HAVE_ALTIVEC
546 # define TEMPLATE_PP_ALTIVEC 1
548 # include "postprocess_template.c"
549 #endif
550 
551 #if ARCH_X86 && HAVE_INLINE_ASM
552 # if CONFIG_RUNTIME_CPUDETECT
553 # define TEMPLATE_PP_MMX 1
554 # include "postprocess_template.c"
555 # define TEMPLATE_PP_MMXEXT 1
556 # include "postprocess_template.c"
557 # define TEMPLATE_PP_3DNOW 1
558 # include "postprocess_template.c"
559 # define TEMPLATE_PP_SSE2 1
560 # include "postprocess_template.c"
561 # else
562 # if HAVE_SSE2_INLINE
563 # define TEMPLATE_PP_SSE2 1
564 # include "postprocess_template.c"
565 # elif HAVE_MMXEXT_INLINE
566 # define TEMPLATE_PP_MMXEXT 1
567 # include "postprocess_template.c"
568 # elif HAVE_AMD3DNOW_INLINE
569 # define TEMPLATE_PP_3DNOW 1
570 # include "postprocess_template.c"
571 # elif HAVE_MMX_INLINE
572 # define TEMPLATE_PP_MMX 1
573 # include "postprocess_template.c"
574 # endif
575 # endif
576 #endif
577 
578 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
579  const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
580 
581 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
582  const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
583 {
584  pp_fn pp = postProcess_C;
585  PPContext *c= (PPContext *)vc;
586  PPMode *ppMode= (PPMode *)vm;
587  c->ppMode= *ppMode; //FIXME
588 
589  if (!(ppMode->lumMode & BITEXACT)) {
590 #if CONFIG_RUNTIME_CPUDETECT
591 #if ARCH_X86 && HAVE_INLINE_ASM
592  // ordered per speed fastest first
593  if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
594  else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
595  else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
596  else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
597 #elif HAVE_ALTIVEC
598  if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
599 #endif
600 #else /* CONFIG_RUNTIME_CPUDETECT */
601 #if HAVE_SSE2_INLINE
602  pp = postProcess_SSE2;
603 #elif HAVE_MMXEXT_INLINE
604  pp = postProcess_MMX2;
605 #elif HAVE_AMD3DNOW_INLINE
606  pp = postProcess_3DNow;
607 #elif HAVE_MMX_INLINE
608  pp = postProcess_MMX;
609 #elif HAVE_ALTIVEC
610  pp = postProcess_altivec;
611 #endif
612 #endif /* !CONFIG_RUNTIME_CPUDETECT */
613  }
614 
615  pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
616 }
617 
618 /* -pp Command line Help
619 */
620 const char pp_help[] =
621 "Available postprocessing filters:\n"
622 "Filters Options\n"
623 "short long name short long option Description\n"
624 "* * a autoq CPU power dependent enabler\n"
625 " c chrom chrominance filtering enabled\n"
626 " y nochrom chrominance filtering disabled\n"
627 " n noluma luma filtering disabled\n"
628 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
629 " 1. difference factor: default=32, higher -> more deblocking\n"
630 " 2. flatness threshold: default=39, lower -> more deblocking\n"
631 " the h & v deblocking filters share these\n"
632 " so you can't set different thresholds for h / v\n"
633 "vb vdeblock (2 threshold) vertical deblocking filter\n"
634 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
635 "va vadeblock (2 threshold) vertical deblocking filter\n"
636 "h1 x1hdeblock experimental h deblock filter 1\n"
637 "v1 x1vdeblock experimental v deblock filter 1\n"
638 "dr dering deringing filter\n"
639 "al autolevels automatic brightness / contrast\n"
640 " f fullyrange stretch luminance to (0..255)\n"
641 "lb linblenddeint linear blend deinterlacer\n"
642 "li linipoldeint linear interpolating deinterlace\n"
643 "ci cubicipoldeint cubic interpolating deinterlacer\n"
644 "md mediandeint median deinterlacer\n"
645 "fd ffmpegdeint ffmpeg deinterlacer\n"
646 "l5 lowpass5 FIR lowpass deinterlacer\n"
647 "de default hb:a,vb:a,dr:a\n"
648 "fa fast h1:a,v1:a,dr:a\n"
649 "ac ha:a:128:7,va:a,dr:a\n"
650 "tn tmpnoise (3 threshold) temporal noise reducer\n"
651 " 1. <= 2. <= 3. larger -> stronger filtering\n"
652 "fq forceQuant <quantizer> force quantizer\n"
653 "Usage:\n"
654 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
655 "long form example:\n"
656 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
657 "short form example:\n"
658 "vb:a/hb:a/lb de,-vb\n"
659 "more examples:\n"
660 "tn:64:128:256\n"
661 "\n"
662 ;
663 
664 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
665 {
667  char *p= temp;
668  static const char filterDelimiters[] = ",/";
669  static const char optionDelimiters[] = ":|";
670  struct PPMode *ppMode;
671  char *filterToken;
672 
673  if (!name) {
674  av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
675  return NULL;
676  }
677 
678  if (!strcmp(name, "help")) {
679  const char *p;
680  for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
681  av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
682  av_log(NULL, AV_LOG_INFO, "%s", temp);
683  }
684  return NULL;
685  }
686 
687  ppMode= av_malloc(sizeof(PPMode));
688 
689  ppMode->lumMode= 0;
690  ppMode->chromMode= 0;
691  ppMode->maxTmpNoise[0]= 700;
692  ppMode->maxTmpNoise[1]= 1500;
693  ppMode->maxTmpNoise[2]= 3000;
694  ppMode->maxAllowedY= 234;
695  ppMode->minAllowedY= 16;
696  ppMode->baseDcDiff= 256/8;
697  ppMode->flatnessThreshold= 56-16-1;
698  ppMode->maxClippedThreshold= 0.01;
699  ppMode->error=0;
700 
701  memset(temp, 0, GET_MODE_BUFFER_SIZE);
702  av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
703 
704  av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
705 
706  for(;;){
707  char *filterName;
708  int q= 1000000; //PP_QUALITY_MAX;
709  int chrom=-1;
710  int luma=-1;
711  char *option;
713  int i;
714  int filterNameOk=0;
715  int numOfUnknownOptions=0;
716  int enable=1; //does the user want us to enabled or disabled the filter
717 
718  filterToken= strtok(p, filterDelimiters);
719  if(filterToken == NULL) break;
720  p+= strlen(filterToken) + 1; // p points to next filterToken
721  filterName= strtok(filterToken, optionDelimiters);
722  if (filterName == NULL) {
723  ppMode->error++;
724  break;
725  }
726  av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
727 
728  if(*filterName == '-'){
729  enable=0;
730  filterName++;
731  }
732 
733  for(;;){ //for all options
734  option= strtok(NULL, optionDelimiters);
735  if(option == NULL) break;
736 
737  av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
738  if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
739  else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
740  else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
741  else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
742  else{
743  options[numOfUnknownOptions] = option;
744  numOfUnknownOptions++;
745  }
746  if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
747  }
748  options[numOfUnknownOptions] = NULL;
749 
750  /* replace stuff from the replace Table */
751  for(i=0; replaceTable[2*i]!=NULL; i++){
752  if(!strcmp(replaceTable[2*i], filterName)){
753  int newlen= strlen(replaceTable[2*i + 1]);
754  int plen;
755  int spaceLeft;
756 
757  p--, *p=',';
758 
759  plen= strlen(p);
760  spaceLeft= p - temp + plen;
761  if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
762  ppMode->error++;
763  break;
764  }
765  memmove(p + newlen, p, plen+1);
766  memcpy(p, replaceTable[2*i + 1], newlen);
767  filterNameOk=1;
768  }
769  }
770 
771  for(i=0; filters[i].shortName!=NULL; i++){
772  if( !strcmp(filters[i].longName, filterName)
773  || !strcmp(filters[i].shortName, filterName)){
774  ppMode->lumMode &= ~filters[i].mask;
775  ppMode->chromMode &= ~filters[i].mask;
776 
777  filterNameOk=1;
778  if(!enable) break; // user wants to disable it
779 
780  if(q >= filters[i].minLumQuality && luma)
781  ppMode->lumMode|= filters[i].mask;
782  if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
783  if(q >= filters[i].minChromQuality)
784  ppMode->chromMode|= filters[i].mask;
785 
786  if(filters[i].mask == LEVEL_FIX){
787  int o;
788  ppMode->minAllowedY= 16;
789  ppMode->maxAllowedY= 234;
790  for(o=0; options[o]!=NULL; o++){
791  if( !strcmp(options[o],"fullyrange")
792  ||!strcmp(options[o],"f")){
793  ppMode->minAllowedY= 0;
794  ppMode->maxAllowedY= 255;
795  numOfUnknownOptions--;
796  }
797  }
798  }
799  else if(filters[i].mask == TEMP_NOISE_FILTER)
800  {
801  int o;
802  int numOfNoises=0;
803 
804  for(o=0; options[o]!=NULL; o++){
805  char *tail;
806  ppMode->maxTmpNoise[numOfNoises]=
807  strtol(options[o], &tail, 0);
808  if(tail!=options[o]){
809  numOfNoises++;
810  numOfUnknownOptions--;
811  if(numOfNoises >= 3) break;
812  }
813  }
814  }
815  else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
816  || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
817  int o;
818 
819  for(o=0; options[o]!=NULL && o<2; o++){
820  char *tail;
821  int val= strtol(options[o], &tail, 0);
822  if(tail==options[o]) break;
823 
824  numOfUnknownOptions--;
825  if(o==0) ppMode->baseDcDiff= val;
826  else ppMode->flatnessThreshold= val;
827  }
828  }
829  else if(filters[i].mask == FORCE_QUANT){
830  int o;
831  ppMode->forcedQuant= 15;
832 
833  for(o=0; options[o]!=NULL && o<1; o++){
834  char *tail;
835  int val= strtol(options[o], &tail, 0);
836  if(tail==options[o]) break;
837 
838  numOfUnknownOptions--;
839  ppMode->forcedQuant= val;
840  }
841  }
842  }
843  }
844  if(!filterNameOk) ppMode->error++;
845  ppMode->error += numOfUnknownOptions;
846  }
847 
848  av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
849  if(ppMode->error){
850  av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
851  av_free(ppMode);
852  return NULL;
853  }
854  return ppMode;
855 }
856 
858  av_free(mode);
859 }
860 
861 static void reallocAlign(void **p, int alignment, int size){
862  av_free(*p);
863  *p= av_mallocz(size);
864 }
865 
866 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
867  int mbWidth = (width+15)>>4;
868  int mbHeight= (height+15)>>4;
869  int i;
870 
871  c->stride= stride;
872  c->qpStride= qpStride;
873 
874  reallocAlign((void **)&c->tempDst, 8, stride*24+32);
875  reallocAlign((void **)&c->tempSrc, 8, stride*24);
876  reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
877  reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
878  for(i=0; i<256; i++)
879  c->yHistogram[i]= width*height/64*15/256;
880 
881  for(i=0; i<3; i++){
882  //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
883  reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
884  reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
885  }
886 
887  reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
888  reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
889  reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
890  reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
891 }
892 
893 static const char * context_to_name(void * ptr) {
894  return "postproc";
895 }
896 
897 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
898 
899 pp_context *pp_get_context(int width, int height, int cpuCaps){
900  PPContext *c= av_malloc(sizeof(PPContext));
901  int stride= FFALIGN(width, 16); //assumed / will realloc if needed
902  int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
903 
904  memset(c, 0, sizeof(PPContext));
906  if(cpuCaps&PP_FORMAT){
907  c->hChromaSubSample= cpuCaps&0x3;
908  c->vChromaSubSample= (cpuCaps>>4)&0x3;
909  }else{
910  c->hChromaSubSample= 1;
911  c->vChromaSubSample= 1;
912  }
913  if (cpuCaps & PP_CPU_CAPS_AUTO) {
914  c->cpuCaps = av_get_cpu_flags();
915  } else {
916  c->cpuCaps = 0;
917  if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
918  if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
919  if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
920  if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
921  }
922 
923  reallocBuffers(c, width, height, stride, qpStride);
924 
925  c->frameNum=-1;
926 
927  return c;
928 }
929 
930 void pp_free_context(void *vc){
931  PPContext *c = (PPContext*)vc;
932  int i;
933 
934  for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
935  for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
936 
937  av_free(c->tempBlocks);
938  av_free(c->yHistogram);
939  av_free(c->tempDst);
940  av_free(c->tempSrc);
941  av_free(c->deintTemp);
942  av_free(c->stdQPTable);
943  av_free(c->nonBQPTable);
945 
946  memset(c, 0, sizeof(PPContext));
947 
948  av_free(c);
949 }
950 
951 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
952  uint8_t * dst[3], const int dstStride[3],
953  int width, int height,
954  const QP_STORE_T *QP_store, int QPStride,
955  pp_mode *vm, void *vc, int pict_type)
956 {
957  int mbWidth = (width+15)>>4;
958  int mbHeight= (height+15)>>4;
959  PPMode *mode = (PPMode*)vm;
960  PPContext *c = (PPContext*)vc;
961  int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
962  int absQPStride = FFABS(QPStride);
963 
964  // c->stride and c->QPStride are always positive
965  if(c->stride < minStride || c->qpStride < absQPStride)
966  reallocBuffers(c, width, height,
967  FFMAX(minStride, c->stride),
968  FFMAX(c->qpStride, absQPStride));
969 
970  if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
971  int i;
972  QP_store= c->forcedQPTable;
973  absQPStride = QPStride = 0;
974  if(mode->lumMode & FORCE_QUANT)
975  for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
976  else
977  for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
978  }
979 
980  if(pict_type & PP_PICT_TYPE_QP2){
981  int i;
982  const int count= mbHeight * absQPStride;
983  for(i=0; i<(count>>2); i++){
984  ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
985  }
986  for(i<<=2; i<count; i++){
987  c->stdQPTable[i] = QP_store[i]>>1;
988  }
989  QP_store= c->stdQPTable;
990  QPStride= absQPStride;
991  }
992 
993  if(0){
994  int x,y;
995  for(y=0; y<mbHeight; y++){
996  for(x=0; x<mbWidth; x++){
997  av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
998  }
999  av_log(c, AV_LOG_INFO, "\n");
1000  }
1001  av_log(c, AV_LOG_INFO, "\n");
1002  }
1003 
1004  if((pict_type&7)!=3){
1005  if (QPStride >= 0){
1006  int i;
1007  const int count= mbHeight * QPStride;
1008  for(i=0; i<(count>>2); i++){
1009  ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1010  }
1011  for(i<<=2; i<count; i++){
1012  c->nonBQPTable[i] = QP_store[i] & 0x3F;
1013  }
1014  } else {
1015  int i,j;
1016  for(i=0; i<mbHeight; i++) {
1017  for(j=0; j<absQPStride; j++) {
1018  c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1019  }
1020  }
1021  }
1022  }
1023 
1024  av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1025  mode->lumMode, mode->chromMode);
1026 
1027  postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1028  width, height, QP_store, QPStride, 0, mode, c);
1029 
1030  width = (width )>>c->hChromaSubSample;
1031  height = (height)>>c->vChromaSubSample;
1032 
1033  if(mode->chromMode){
1034  postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1035  width, height, QP_store, QPStride, 1, mode, c);
1036  postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1037  width, height, QP_store, QPStride, 2, mode, c);
1038  }
1039  else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1040  linecpy(dst[1], src[1], height, srcStride[1]);
1041  linecpy(dst[2], src[2], height, srcStride[2]);
1042  }else{
1043  int y;
1044  for(y=0; y<height; y++){
1045  memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1046  memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1047  }
1048  }
1049 }