FFmpeg: libpostproc/postprocess.c Source File

00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
00003  *
00004  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
00005  *
00006  * This file is part of FFmpeg.
00007  *
00008  * FFmpeg is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * FFmpeg is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with FFmpeg; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00028 /*
00029                         C       MMX     MMX2    3DNow   AltiVec
00030 isVertDC                Ec      Ec                      Ec
00031 isVertMinMaxOk          Ec      Ec                      Ec
00032 doVertLowPass           E               e       e       Ec
00033 doVertDefFilter         Ec      Ec      e       e       Ec
00034 isHorizDC               Ec      Ec                      Ec
00035 isHorizMinMaxOk         a       E                       Ec
00036 doHorizLowPass          E               e       e       Ec
00037 doHorizDefFilter        Ec      Ec      e       e       Ec
00038 do_a_deblock            Ec      E       Ec      E
00039 deRing                  E               e       e*      Ecp
00040 Vertical RKAlgo1        E               a       a
00041 Horizontal RKAlgo1                      a       a
00042 Vertical X1#            a               E       E
00043 Horizontal X1#          a               E       E
00044 LinIpolDeinterlace      e               E       E*
00045 CubicIpolDeinterlace    a               e       e*
00046 LinBlendDeinterlace     e               E       E*
00047 MedianDeinterlace#      E       Ec      Ec
00048 TempDeNoiser#           E               e       e       Ec
00049 
00050 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
00051 # more or less selfinvented filters so the exactness is not too meaningful
00052 E = Exact implementation
00053 e = almost exact implementation (slightly different rounding,...)
00054 a = alternative / approximate impl
00055 c = checked against the other implementations (-vo md5)
00056 p = partially optimized, still some work to do
00057 */
00058 
00059 /*
00060 TODO:
00061 reduce the time wasted on the mem transfer
00062 unroll stuff if instructions depend too much on the prior one
00063 move YScale thing to the end instead of fixing QP
00064 write a faster and higher quality deblocking filter :)
00065 make the mainloop more flexible (variable number of blocks at once
00066         (the if/else stuff per block is slowing things down)
00067 compare the quality & speed of all filters
00068 split this huge file
00069 optimize c versions
00070 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
00071 ...
00072 */
00073 
00074 //Changelog: use git log
00075 
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include "libavutil/avassert.h"
00079 #include <inttypes.h>
00080 #include <stdio.h>
00081 #include <stdlib.h>
00082 #include <string.h>
00083 //#undef HAVE_MMXEXT_INLINE
00084 //#define HAVE_AMD3DNOW_INLINE
00085 //#undef HAVE_MMX_INLINE
00086 //#undef ARCH_X86
00087 //#define DEBUG_BRIGHTNESS
00088 #include "postprocess.h"
00089 #include "postprocess_internal.h"
00090 #include "libavutil/avstring.h"
00091 
00092 unsigned postproc_version(void)
00093 {
00094     av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
00095     return LIBPOSTPROC_VERSION_INT;
00096 }
00097 
00098 const char *postproc_configuration(void)
00099 {
00100     return FFMPEG_CONFIGURATION;
00101 }
00102 
00103 const char *postproc_license(void)
00104 {
00105 #define LICENSE_PREFIX "libpostproc license: "
00106     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00107 }
00108 
00109 #if HAVE_ALTIVEC_H
00110 #include <altivec.h>
00111 #endif
00112 
00113 #define GET_MODE_BUFFER_SIZE 500
00114 #define OPTIONS_ARRAY_SIZE 10
00115 #define BLOCK_SIZE 8
00116 #define TEMP_STRIDE 8
00117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
00118 
00119 #if ARCH_X86 && HAVE_INLINE_ASM
00120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00128 #endif
00129 
00130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00131 
00132 
00133 static const struct PPFilter filters[]=
00134 {
00135     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
00136     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
00137 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
00138     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
00139     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
00140     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
00141     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
00142     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
00143     {"dr", "dering",                1, 5, 6, DERING},
00144     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
00145     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00146     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00147     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00148     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
00149     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
00150     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
00151     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
00152     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
00153     {"be", "bitexact",              1, 0, 0, BITEXACT},
00154     {NULL, NULL,0,0,0,0} //End Marker
00155 };
00156 
00157 static const char *replaceTable[]=
00158 {
00159     "default",      "hb:a,vb:a,dr:a",
00160     "de",           "hb:a,vb:a,dr:a",
00161     "fast",         "h1:a,v1:a,dr:a",
00162     "fa",           "h1:a,v1:a,dr:a",
00163     "ac",           "ha:a:128:7,va:a,dr:a",
00164     NULL //End Marker
00165 };
00166 
00167 
00168 #if ARCH_X86 && HAVE_INLINE_ASM
00169 static inline void prefetchnta(void *p)
00170 {
00171     __asm__ volatile(   "prefetchnta (%0)\n\t"
00172         : : "r" (p)
00173     );
00174 }
00175 
00176 static inline void prefetcht0(void *p)
00177 {
00178     __asm__ volatile(   "prefetcht0 (%0)\n\t"
00179         : : "r" (p)
00180     );
00181 }
00182 
00183 static inline void prefetcht1(void *p)
00184 {
00185     __asm__ volatile(   "prefetcht1 (%0)\n\t"
00186         : : "r" (p)
00187     );
00188 }
00189 
00190 static inline void prefetcht2(void *p)
00191 {
00192     __asm__ volatile(   "prefetcht2 (%0)\n\t"
00193         : : "r" (p)
00194     );
00195 }
00196 #endif
00197 
00198 /* The horizontal functions exist only in C because the MMX
00199  * code is faster with vertical filters and transposing. */
00200 
00204 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
00205 {
00206     int numEq= 0;
00207     int y;
00208     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00209     const int dcThreshold= dcOffset*2 + 1;
00210 
00211     for(y=0; y<BLOCK_SIZE; y++){
00212         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00213         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00214         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00215         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00216         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00217         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00218         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00219         src+= stride;
00220     }
00221     return numEq > c->ppMode.flatnessThreshold;
00222 }
00223 
00227 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
00228 {
00229     int numEq= 0;
00230     int y;
00231     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00232     const int dcThreshold= dcOffset*2 + 1;
00233 
00234     src+= stride*4; // src points to begin of the 8x8 Block
00235     for(y=0; y<BLOCK_SIZE-1; y++){
00236         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00237         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00238         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00239         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00240         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00241         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00242         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00243         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00244         src+= stride;
00245     }
00246     return numEq > c->ppMode.flatnessThreshold;
00247 }
00248 
00249 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
00250 {
00251     int i;
00252     for(i=0; i<2; i++){
00253         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00254         src += stride;
00255         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00256         src += stride;
00257         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00258         src += stride;
00259         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00260         src += stride;
00261     }
00262     return 1;
00263 }
00264 
00265 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
00266 {
00267     int x;
00268     src+= stride*4;
00269     for(x=0; x<BLOCK_SIZE; x+=4){
00270         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
00271         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00272         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00273         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00274     }
00275     return 1;
00276 }
00277 
00278 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
00279 {
00280     if( isHorizDC_C(src, stride, c) ){
00281         if( isHorizMinMaxOk_C(src, stride, c->QP) )
00282             return 1;
00283         else
00284             return 0;
00285     }else{
00286         return 2;
00287     }
00288 }
00289 
00290 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
00291 {
00292     if( isVertDC_C(src, stride, c) ){
00293         if( isVertMinMaxOk_C(src, stride, c->QP) )
00294             return 1;
00295         else
00296             return 0;
00297     }else{
00298         return 2;
00299     }
00300 }
00301 
00302 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
00303 {
00304     int y;
00305     for(y=0; y<BLOCK_SIZE; y++){
00306         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00307 
00308         if(FFABS(middleEnergy) < 8*c->QP){
00309             const int q=(dst[3] - dst[4])/2;
00310             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00311             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00312 
00313             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00314             d= FFMAX(d, 0);
00315 
00316             d= (5*d + 32) >> 6;
00317             d*= FFSIGN(-middleEnergy);
00318 
00319             if(q>0)
00320             {
00321                 d= d<0 ? 0 : d;
00322                 d= d>q ? q : d;
00323             }
00324             else
00325             {
00326                 d= d>0 ? 0 : d;
00327                 d= d<q ? q : d;
00328             }
00329 
00330             dst[3]-= d;
00331             dst[4]+= d;
00332         }
00333         dst+= stride;
00334     }
00335 }
00336 
00341 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
00342 {
00343     int y;
00344     for(y=0; y<BLOCK_SIZE; y++){
00345         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00346         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00347 
00348         int sums[10];
00349         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00350         sums[1] = sums[0] - first  + dst[3];
00351         sums[2] = sums[1] - first  + dst[4];
00352         sums[3] = sums[2] - first  + dst[5];
00353         sums[4] = sums[3] - first  + dst[6];
00354         sums[5] = sums[4] - dst[0] + dst[7];
00355         sums[6] = sums[5] - dst[1] + last;
00356         sums[7] = sums[6] - dst[2] + last;
00357         sums[8] = sums[7] - dst[3] + last;
00358         sums[9] = sums[8] - dst[4] + last;
00359 
00360         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00361         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00362         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00363         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00364         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00365         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00366         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00367         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00368 
00369         dst+= stride;
00370     }
00371 }
00372 
00381 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00382 {
00383     int y;
00384     static uint64_t *lut= NULL;
00385     if(lut==NULL)
00386     {
00387         int i;
00388         lut = av_malloc(256*8);
00389         for(i=0; i<256; i++)
00390         {
00391             int v= i < 128 ? 2*i : 2*(i-256);
00392 /*
00393 //Simulate 112242211 9-Tap filter
00394             uint64_t a= (v/16)  & 0xFF;
00395             uint64_t b= (v/8)   & 0xFF;
00396             uint64_t c= (v/4)   & 0xFF;
00397             uint64_t d= (3*v/8) & 0xFF;
00398 */
00399 //Simulate piecewise linear interpolation
00400             uint64_t a= (v/16)   & 0xFF;
00401             uint64_t b= (v*3/16) & 0xFF;
00402             uint64_t c= (v*5/16) & 0xFF;
00403             uint64_t d= (7*v/16) & 0xFF;
00404             uint64_t A= (0x100 - a)&0xFF;
00405             uint64_t B= (0x100 - b)&0xFF;
00406             uint64_t C= (0x100 - c)&0xFF;
00407             uint64_t D= (0x100 - c)&0xFF;
00408 
00409             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00410                        (D<<24) | (C<<16) | (B<<8)  | (A);
00411             //lut[i] = (v<<32) | (v<<24);
00412         }
00413     }
00414 
00415     for(y=0; y<BLOCK_SIZE; y++){
00416         int a= src[1] - src[2];
00417         int b= src[3] - src[4];
00418         int c= src[5] - src[6];
00419 
00420         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00421 
00422         if(d < QP){
00423             int v = d * FFSIGN(-b);
00424 
00425             src[1] +=v/8;
00426             src[2] +=v/4;
00427             src[3] +=3*v/8;
00428             src[4] -=3*v/8;
00429             src[5] -=v/4;
00430             src[6] -=v/8;
00431         }
00432         src+=stride;
00433     }
00434 }
00435 
00439 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
00440                                             int stride, const PPContext *c)
00441 {
00442     int y;
00443     const int QP= c->QP;
00444     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00445     const int dcThreshold= dcOffset*2 + 1;
00446 //START_TIMER
00447     src+= step*4; // src points to begin of the 8x8 Block
00448     for(y=0; y<8; y++){
00449         int numEq= 0;
00450 
00451         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00452         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00453         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00454         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00455         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00456         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00457         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00458         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00459         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00460         if(numEq > c->ppMode.flatnessThreshold){
00461             int min, max, x;
00462 
00463             if(src[0] > src[step]){
00464                 max= src[0];
00465                 min= src[step];
00466             }else{
00467                 max= src[step];
00468                 min= src[0];
00469             }
00470             for(x=2; x<8; x+=2){
00471                 if(src[x*step] > src[(x+1)*step]){
00472                         if(src[x    *step] > max) max= src[ x   *step];
00473                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
00474                 }else{
00475                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
00476                         if(src[ x   *step] < min) min= src[ x   *step];
00477                 }
00478             }
00479             if(max-min < 2*QP){
00480                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00481                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00482 
00483                 int sums[10];
00484                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00485                 sums[1] = sums[0] - first       + src[3*step];
00486                 sums[2] = sums[1] - first       + src[4*step];
00487                 sums[3] = sums[2] - first       + src[5*step];
00488                 sums[4] = sums[3] - first       + src[6*step];
00489                 sums[5] = sums[4] - src[0*step] + src[7*step];
00490                 sums[6] = sums[5] - src[1*step] + last;
00491                 sums[7] = sums[6] - src[2*step] + last;
00492                 sums[8] = sums[7] - src[3*step] + last;
00493                 sums[9] = sums[8] - src[4*step] + last;
00494 
00495                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00496                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00497                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00498                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00499                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00500                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00501                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00502                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00503             }
00504         }else{
00505             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00506 
00507             if(FFABS(middleEnergy) < 8*QP){
00508                 const int q=(src[3*step] - src[4*step])/2;
00509                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00510                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00511 
00512                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00513                 d= FFMAX(d, 0);
00514 
00515                 d= (5*d + 32) >> 6;
00516                 d*= FFSIGN(-middleEnergy);
00517 
00518                 if(q>0){
00519                     d= d<0 ? 0 : d;
00520                     d= d>q ? q : d;
00521                 }else{
00522                     d= d>0 ? 0 : d;
00523                     d= d<q ? q : d;
00524                 }
00525 
00526                 src[3*step]-= d;
00527                 src[4*step]+= d;
00528             }
00529         }
00530 
00531         src += stride;
00532     }
00533 /*if(step==16){
00534     STOP_TIMER("step16")
00535 }else{
00536     STOP_TIMER("stepX")
00537 }*/
00538 }
00539 
00540 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
00541 //Plain C versions
00542 //we always compile C for testing which needs bitexactness
00543 #define TEMPLATE_PP_C 1
00544 #include "postprocess_template.c"
00545 
00546 #if HAVE_ALTIVEC
00547 #   define TEMPLATE_PP_ALTIVEC 1
00548 #   include "postprocess_altivec_template.c"
00549 #   include "postprocess_template.c"
00550 #endif
00551 
00552 #if ARCH_X86 && HAVE_INLINE_ASM
00553 #    if CONFIG_RUNTIME_CPUDETECT
00554 #        define TEMPLATE_PP_MMX 1
00555 #        include "postprocess_template.c"
00556 #        define TEMPLATE_PP_MMXEXT 1
00557 #        include "postprocess_template.c"
00558 #        define TEMPLATE_PP_3DNOW 1
00559 #        include "postprocess_template.c"
00560 #        define TEMPLATE_PP_SSE2 1
00561 #        include "postprocess_template.c"
00562 #    else
00563 #        if HAVE_SSE2_INLINE
00564 #            define TEMPLATE_PP_SSE2 1
00565 #            include "postprocess_template.c"
00566 #        elif HAVE_MMXEXT_INLINE
00567 #            define TEMPLATE_PP_MMXEXT 1
00568 #            include "postprocess_template.c"
00569 #        elif HAVE_AMD3DNOW_INLINE
00570 #            define TEMPLATE_PP_3DNOW 1
00571 #            include "postprocess_template.c"
00572 #        elif HAVE_MMX_INLINE
00573 #            define TEMPLATE_PP_MMX 1
00574 #            include "postprocess_template.c"
00575 #        endif
00576 #    endif
00577 #endif
00578 
00579 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00580                       const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
00581 
00582 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00583         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00584 {
00585     pp_fn pp = postProcess_C;
00586     PPContext *c= (PPContext *)vc;
00587     PPMode *ppMode= (PPMode *)vm;
00588     c->ppMode= *ppMode; //FIXME
00589 
00590     if (!(ppMode->lumMode & BITEXACT)) {
00591 #if CONFIG_RUNTIME_CPUDETECT
00592 #if ARCH_X86 && HAVE_INLINE_ASM
00593         // ordered per speed fastest first
00594         if      (c->cpuCaps & AV_CPU_FLAG_SSE2)     pp = postProcess_SSE2;
00595         else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT)   pp = postProcess_MMX2;
00596         else if (c->cpuCaps & AV_CPU_FLAG_3DNOW)    pp = postProcess_3DNow;
00597         else if (c->cpuCaps & AV_CPU_FLAG_MMX)      pp = postProcess_MMX;
00598 #elif HAVE_ALTIVEC
00599         if      (c->cpuCaps & AV_CPU_FLAG_ALTIVEC)  pp = postProcess_altivec;
00600 #endif
00601 #else /* CONFIG_RUNTIME_CPUDETECT */
00602 #if     HAVE_SSE2_INLINE
00603         pp = postProcess_SSE2;
00604 #elif   HAVE_MMXEXT_INLINE
00605         pp = postProcess_MMX2;
00606 #elif HAVE_AMD3DNOW_INLINE
00607         pp = postProcess_3DNow;
00608 #elif HAVE_MMX_INLINE
00609         pp = postProcess_MMX;
00610 #elif HAVE_ALTIVEC
00611         pp = postProcess_altivec;
00612 #endif
00613 #endif /* !CONFIG_RUNTIME_CPUDETECT */
00614     }
00615 
00616     pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00617 }
00618 
00619 /* -pp Command line Help
00620 */
00621 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00622 const char *const pp_help=
00623 #else
00624 const char pp_help[] =
00625 #endif
00626 "Available postprocessing filters:\n"
00627 "Filters                        Options\n"
00628 "short  long name       short   long option     Description\n"
00629 "*      *               a       autoq           CPU power dependent enabler\n"
00630 "                       c       chrom           chrominance filtering enabled\n"
00631 "                       y       nochrom         chrominance filtering disabled\n"
00632 "                       n       noluma          luma filtering disabled\n"
00633 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
00634 "       1. difference factor: default=32, higher -> more deblocking\n"
00635 "       2. flatness threshold: default=39, lower -> more deblocking\n"
00636 "                       the h & v deblocking filters share these\n"
00637 "                       so you can't set different thresholds for h / v\n"
00638 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
00639 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
00640 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
00641 "h1     x1hdeblock                              experimental h deblock filter 1\n"
00642 "v1     x1vdeblock                              experimental v deblock filter 1\n"
00643 "dr     dering                                  deringing filter\n"
00644 "al     autolevels                              automatic brightness / contrast\n"
00645 "                       f        fullyrange     stretch luminance to (0..255)\n"
00646 "lb     linblenddeint                           linear blend deinterlacer\n"
00647 "li     linipoldeint                            linear interpolating deinterlace\n"
00648 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
00649 "md     mediandeint                             median deinterlacer\n"
00650 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
00651 "l5     lowpass5                                FIR lowpass deinterlacer\n"
00652 "de     default                                 hb:a,vb:a,dr:a\n"
00653 "fa     fast                                    h1:a,v1:a,dr:a\n"
00654 "ac                                             ha:a:128:7,va:a,dr:a\n"
00655 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
00656 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
00657 "fq     forceQuant      <quantizer>             force quantizer\n"
00658 "Usage:\n"
00659 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00660 "long form example:\n"
00661 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
00662 "short form example:\n"
00663 "vb:a/hb:a/lb                                   de,-vb\n"
00664 "more examples:\n"
00665 "tn:64:128:256\n"
00666 "\n"
00667 ;
00668 
00669 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00670 {
00671     char temp[GET_MODE_BUFFER_SIZE];
00672     char *p= temp;
00673     static const char filterDelimiters[] = ",/";
00674     static const char optionDelimiters[] = ":";
00675     struct PPMode *ppMode;
00676     char *filterToken;
00677 
00678     if (!name)  {
00679         av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
00680         return NULL;
00681     }
00682 
00683     if (!strcmp(name, "help")) {
00684         const char *p;
00685         for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
00686             av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
00687             av_log(NULL, AV_LOG_INFO, "%s", temp);
00688         }
00689         return NULL;
00690     }
00691 
00692     ppMode= av_malloc(sizeof(PPMode));
00693 
00694     ppMode->lumMode= 0;
00695     ppMode->chromMode= 0;
00696     ppMode->maxTmpNoise[0]= 700;
00697     ppMode->maxTmpNoise[1]= 1500;
00698     ppMode->maxTmpNoise[2]= 3000;
00699     ppMode->maxAllowedY= 234;
00700     ppMode->minAllowedY= 16;
00701     ppMode->baseDcDiff= 256/8;
00702     ppMode->flatnessThreshold= 56-16-1;
00703     ppMode->maxClippedThreshold= 0.01;
00704     ppMode->error=0;
00705 
00706     memset(temp, 0, GET_MODE_BUFFER_SIZE);
00707     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00708 
00709     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00710 
00711     for(;;){
00712         char *filterName;
00713         int q= 1000000; //PP_QUALITY_MAX;
00714         int chrom=-1;
00715         int luma=-1;
00716         char *option;
00717         char *options[OPTIONS_ARRAY_SIZE];
00718         int i;
00719         int filterNameOk=0;
00720         int numOfUnknownOptions=0;
00721         int enable=1; //does the user want us to enabled or disabled the filter
00722 
00723         filterToken= strtok(p, filterDelimiters);
00724         if(filterToken == NULL) break;
00725         p+= strlen(filterToken) + 1; // p points to next filterToken
00726         filterName= strtok(filterToken, optionDelimiters);
00727         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00728 
00729         if(*filterName == '-'){
00730             enable=0;
00731             filterName++;
00732         }
00733 
00734         for(;;){ //for all options
00735             option= strtok(NULL, optionDelimiters);
00736             if(option == NULL) break;
00737 
00738             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00739             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00740             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00741             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00742             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00743             else{
00744                 options[numOfUnknownOptions] = option;
00745                 numOfUnknownOptions++;
00746             }
00747             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00748         }
00749         options[numOfUnknownOptions] = NULL;
00750 
00751         /* replace stuff from the replace Table */
00752         for(i=0; replaceTable[2*i]!=NULL; i++){
00753             if(!strcmp(replaceTable[2*i], filterName)){
00754                 int newlen= strlen(replaceTable[2*i + 1]);
00755                 int plen;
00756                 int spaceLeft;
00757 
00758                 p--, *p=',';
00759 
00760                 plen= strlen(p);
00761                 spaceLeft= p - temp + plen;
00762                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
00763                     ppMode->error++;
00764                     break;
00765                 }
00766                 memmove(p + newlen, p, plen+1);
00767                 memcpy(p, replaceTable[2*i + 1], newlen);
00768                 filterNameOk=1;
00769             }
00770         }
00771 
00772         for(i=0; filters[i].shortName!=NULL; i++){
00773             if(   !strcmp(filters[i].longName, filterName)
00774                || !strcmp(filters[i].shortName, filterName)){
00775                 ppMode->lumMode &= ~filters[i].mask;
00776                 ppMode->chromMode &= ~filters[i].mask;
00777 
00778                 filterNameOk=1;
00779                 if(!enable) break; // user wants to disable it
00780 
00781                 if(q >= filters[i].minLumQuality && luma)
00782                     ppMode->lumMode|= filters[i].mask;
00783                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00784                     if(q >= filters[i].minChromQuality)
00785                             ppMode->chromMode|= filters[i].mask;
00786 
00787                 if(filters[i].mask == LEVEL_FIX){
00788                     int o;
00789                     ppMode->minAllowedY= 16;
00790                     ppMode->maxAllowedY= 234;
00791                     for(o=0; options[o]!=NULL; o++){
00792                         if(  !strcmp(options[o],"fullyrange")
00793                            ||!strcmp(options[o],"f")){
00794                             ppMode->minAllowedY= 0;
00795                             ppMode->maxAllowedY= 255;
00796                             numOfUnknownOptions--;
00797                         }
00798                     }
00799                 }
00800                 else if(filters[i].mask == TEMP_NOISE_FILTER)
00801                 {
00802                     int o;
00803                     int numOfNoises=0;
00804 
00805                     for(o=0; options[o]!=NULL; o++){
00806                         char *tail;
00807                         ppMode->maxTmpNoise[numOfNoises]=
00808                             strtol(options[o], &tail, 0);
00809                         if(tail!=options[o]){
00810                             numOfNoises++;
00811                             numOfUnknownOptions--;
00812                             if(numOfNoises >= 3) break;
00813                         }
00814                     }
00815                 }
00816                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
00817                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00818                     int o;
00819 
00820                     for(o=0; options[o]!=NULL && o<2; o++){
00821                         char *tail;
00822                         int val= strtol(options[o], &tail, 0);
00823                         if(tail==options[o]) break;
00824 
00825                         numOfUnknownOptions--;
00826                         if(o==0) ppMode->baseDcDiff= val;
00827                         else ppMode->flatnessThreshold= val;
00828                     }
00829                 }
00830                 else if(filters[i].mask == FORCE_QUANT){
00831                     int o;
00832                     ppMode->forcedQuant= 15;
00833 
00834                     for(o=0; options[o]!=NULL && o<1; o++){
00835                         char *tail;
00836                         int val= strtol(options[o], &tail, 0);
00837                         if(tail==options[o]) break;
00838 
00839                         numOfUnknownOptions--;
00840                         ppMode->forcedQuant= val;
00841                     }
00842                 }
00843             }
00844         }
00845         if(!filterNameOk) ppMode->error++;
00846         ppMode->error += numOfUnknownOptions;
00847     }
00848 
00849     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00850     if(ppMode->error){
00851         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00852         av_free(ppMode);
00853         return NULL;
00854     }
00855     return ppMode;
00856 }
00857 
00858 void pp_free_mode(pp_mode *mode){
00859     av_free(mode);
00860 }
00861 
00862 static void reallocAlign(void **p, int alignment, int size){
00863     av_free(*p);
00864     *p= av_mallocz(size);
00865 }
00866 
00867 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00868     int mbWidth = (width+15)>>4;
00869     int mbHeight= (height+15)>>4;
00870     int i;
00871 
00872     c->stride= stride;
00873     c->qpStride= qpStride;
00874 
00875     reallocAlign((void **)&c->tempDst, 8, stride*24);
00876     reallocAlign((void **)&c->tempSrc, 8, stride*24);
00877     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00878     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00879     for(i=0; i<256; i++)
00880             c->yHistogram[i]= width*height/64*15/256;
00881 
00882     for(i=0; i<3; i++){
00883         //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
00884         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00885         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
00886     }
00887 
00888     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00889     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00890     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00891     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00892 }
00893 
00894 static const char * context_to_name(void * ptr) {
00895     return "postproc";
00896 }
00897 
00898 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00899 
00900 pp_context *pp_get_context(int width, int height, int cpuCaps){
00901     PPContext *c= av_malloc(sizeof(PPContext));
00902     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
00903     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
00904 
00905     memset(c, 0, sizeof(PPContext));
00906     c->av_class = &av_codec_context_class;
00907     if(cpuCaps&PP_FORMAT){
00908         c->hChromaSubSample= cpuCaps&0x3;
00909         c->vChromaSubSample= (cpuCaps>>4)&0x3;
00910     }else{
00911         c->hChromaSubSample= 1;
00912         c->vChromaSubSample= 1;
00913     }
00914     if (cpuCaps & PP_CPU_CAPS_AUTO) {
00915         c->cpuCaps = av_get_cpu_flags();
00916     } else {
00917         c->cpuCaps = 0;
00918         if (cpuCaps & PP_CPU_CAPS_MMX)      c->cpuCaps |= AV_CPU_FLAG_MMX;
00919         if (cpuCaps & PP_CPU_CAPS_MMX2)     c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
00920         if (cpuCaps & PP_CPU_CAPS_3DNOW)    c->cpuCaps |= AV_CPU_FLAG_3DNOW;
00921         if (cpuCaps & PP_CPU_CAPS_ALTIVEC)  c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
00922     }
00923 
00924     reallocBuffers(c, width, height, stride, qpStride);
00925 
00926     c->frameNum=-1;
00927 
00928     return c;
00929 }
00930 
00931 void pp_free_context(void *vc){
00932     PPContext *c = (PPContext*)vc;
00933     int i;
00934 
00935     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00936     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00937 
00938     av_free(c->tempBlocks);
00939     av_free(c->yHistogram);
00940     av_free(c->tempDst);
00941     av_free(c->tempSrc);
00942     av_free(c->deintTemp);
00943     av_free(c->stdQPTable);
00944     av_free(c->nonBQPTable);
00945     av_free(c->forcedQPTable);
00946 
00947     memset(c, 0, sizeof(PPContext));
00948 
00949     av_free(c);
00950 }
00951 
00952 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
00953                      uint8_t * dst[3], const int dstStride[3],
00954                      int width, int height,
00955                      const QP_STORE_T *QP_store,  int QPStride,
00956                      pp_mode *vm,  void *vc, int pict_type)
00957 {
00958     int mbWidth = (width+15)>>4;
00959     int mbHeight= (height+15)>>4;
00960     PPMode *mode = (PPMode*)vm;
00961     PPContext *c = (PPContext*)vc;
00962     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
00963     int absQPStride = FFABS(QPStride);
00964 
00965     // c->stride and c->QPStride are always positive
00966     if(c->stride < minStride || c->qpStride < absQPStride)
00967         reallocBuffers(c, width, height,
00968                        FFMAX(minStride, c->stride),
00969                        FFMAX(c->qpStride, absQPStride));
00970 
00971     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
00972         int i;
00973         QP_store= c->forcedQPTable;
00974         absQPStride = QPStride = 0;
00975         if(mode->lumMode & FORCE_QUANT)
00976             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
00977         else
00978             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
00979     }
00980 
00981     if(pict_type & PP_PICT_TYPE_QP2){
00982         int i;
00983         const int count= mbHeight * absQPStride;
00984         for(i=0; i<(count>>2); i++){
00985             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
00986         }
00987         for(i<<=2; i<count; i++){
00988             c->stdQPTable[i] = QP_store[i]>>1;
00989         }
00990         QP_store= c->stdQPTable;
00991         QPStride= absQPStride;
00992     }
00993 
00994     if(0){
00995         int x,y;
00996         for(y=0; y<mbHeight; y++){
00997             for(x=0; x<mbWidth; x++){
00998                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
00999             }
01000             av_log(c, AV_LOG_INFO, "\n");
01001         }
01002         av_log(c, AV_LOG_INFO, "\n");
01003     }
01004 
01005     if((pict_type&7)!=3){
01006         if (QPStride >= 0){
01007             int i;
01008             const int count= mbHeight * QPStride;
01009             for(i=0; i<(count>>2); i++){
01010                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01011             }
01012             for(i<<=2; i<count; i++){
01013                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01014             }
01015         } else {
01016             int i,j;
01017             for(i=0; i<mbHeight; i++) {
01018                 for(j=0; j<absQPStride; j++) {
01019                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01020                 }
01021             }
01022         }
01023     }
01024 
01025     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01026            mode->lumMode, mode->chromMode);
01027 
01028     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01029                 width, height, QP_store, QPStride, 0, mode, c);
01030 
01031     width  = (width )>>c->hChromaSubSample;
01032     height = (height)>>c->vChromaSubSample;
01033 
01034     if(mode->chromMode){
01035         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01036                     width, height, QP_store, QPStride, 1, mode, c);
01037         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01038                     width, height, QP_store, QPStride, 2, mode, c);
01039     }
01040     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01041         linecpy(dst[1], src[1], height, srcStride[1]);
01042         linecpy(dst[2], src[2], height, srcStride[2]);
01043     }else{
01044         int y;
01045         for(y=0; y<height; y++){
01046             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01047             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01048         }
01049     }
01050 }