44 #define XMIN(a,b) ((a) < (b) ? (a) : (b)) 
   45 #define XMAX(a,b) ((a) > (b) ? (a) : (b)) 
   49 {  0,  48,  12,  60,   3,  51,  15,  63, },
 
   50 { 32,  16,  44,  28,  35,  19,  47,  31, },
 
   51 {  8,  56,   4,  52,  11,  59,   7,  55, },
 
   52 { 40,  24,  36,  20,  43,  27,  39,  23, },
 
   53 {  2,  50,  14,  62,   1,  49,  13,  61, },
 
   54 { 34,  18,  46,  30,  33,  17,  45,  29, },
 
   55 { 10,  58,   6,  54,   9,  57,   5,  53, },
 
   56 { 42,  26,  38,  22,  41,  25,  37,  21, },
 
   67 static inline void dct7_c(int16_t *dst, 
int s0, 
int s1, 
int s2, 
int s3, 
int step){
 
   71 #define C0 ((int)(1024*0.37796447300922719759+0.5)) //sqrt(1/7) 
   72 #define C1 ((int)(1024*0.53452248382484879308/6+0.5)) //sqrt(2/7)/6 
   74 #define C2 ((int)(1024*0.45221175985034745004/2+0.5)) 
   75 #define C3 ((int)(1024*0.36264567479870879474/2+0.5)) 
   78 #define C4 ((int)(1024*0.1962505182412941918+0.5)) 
   79 #define C5 ((int)(1024*0.0149276808419397944+0.5)) 
   83     dst[0*step] = ((s + 
s3)*C0 + 512) >> 10;
 
   84     s= (s - 6*
s3)*
C1 + 512;
 
   85     d= (s0-
s2)*
C4 + (s1-s2)*
C5;
 
   86     dst[1*step] = (s + 2*d)>>10;
 
   88     d= (s1-
s0)*
C2 + (s1-s2)*
C3;
 
   89     dst[2*step] = (s + d)>>10;
 
   90     dst[3*step] = (s - d)>>10;
 
   99     dst[1*step]= 2*s3 +   
s2;
 
  100     dst[3*step]=   s3 - 2*
s2;
 
  106             dst2[i*step/2] += 
src[j*step] * cos(i*
M_PI/n*(j+0.5)) * sqrt((i?2.0:1.0)/
n);
 
  107         if(fabs(dst2[i*step/2] - dst[i*step/2]) > 20)
 
  108             printf(
"%d %d %d (%d %d %d %d) -> (%d %d %d %d)\n", i,dst2[i*step/2], dst[i*step/2],
src[0*step], 
src[1*step], 
src[2*step], 
src[3*step], dst[0*step], dst[1*step],dst[2*step],dst[3*step]);
 
  140         int s0=  src[0*4] + src[6*4];
 
  141         int s1=  src[1*4] + src[5*4];
 
  142         int s2=  src[2*4] + src[4*4];
 
  161         "movq  (%0), %%mm0      \n\t" 
  162         "movq  1*4*2(%0), %%mm1 \n\t" 
  163         "paddw 6*4*2(%0), %%mm0 \n\t" 
  164         "paddw 5*4*2(%0), %%mm1 \n\t" 
  165         "movq  2*4*2(%0), %%mm2 \n\t" 
  166         "movq  3*4*2(%0), %%mm3 \n\t" 
  167         "paddw 4*4*2(%0), %%mm2 \n\t" 
  168         "paddw %%mm3, %%mm3     \n\t"  
  169         "movq %%mm3, %%mm4      \n\t"  
  170         "psubw %%mm0, %%mm3     \n\t"  
  171         "paddw %%mm0, %%mm4     \n\t"  
  172         "movq %%mm2, %%mm0      \n\t"  
  173         "psubw %%mm1, %%mm2     \n\t"  
  174         "paddw %%mm1, %%mm0     \n\t"  
  175         "movq %%mm4, %%mm1      \n\t"  
  176         "psubw %%mm0, %%mm4     \n\t"  
  177         "paddw %%mm0, %%mm1     \n\t"  
  178         "movq %%mm3, %%mm0      \n\t"  
  179         "psubw %%mm2, %%mm3     \n\t" 
  180         "psubw %%mm2, %%mm3     \n\t" 
  181         "paddw %%mm0, %%mm2     \n\t" 
  182         "paddw %%mm0, %%mm2     \n\t" 
  183         "movq %%mm1, (%1)       \n\t" 
  184         "movq %%mm4, 2*4*2(%1)  \n\t" 
  185         "movq %%mm2, 1*4*2(%1)  \n\t" 
  186         "movq %%mm3, 3*4*2(%1)  \n\t" 
  187         :: 
"r" (
src), 
"r"(dst)
 
  198 #define SN1 2.2360679775 
  199 #define SN2 3.16227766017 
  204     N/(N1*
N0), 
N/(N1*N1), 
N/(N1*
N0),
N/(N1*N2),
 
  206     N/(N2*
N0), 
N/(N2*N1), 
N/(N2*
N0),
N/(N2*N2),
 
  211     N/(SN2*
SN0), 
N/(SN2*SN2), 
N/(SN2*
SN0),
N/(SN2*SN2),
 
  213     N/(SN2*
SN0), 
N/(SN2*SN2), 
N/(SN2*
SN0),
N/(SN2*SN2),
 
  222     for(qp=0; qp<99; qp++){
 
  224             thres2[qp][i]= ((i&1)?
SN2:
SN0) * ((i&4)?
SN2:SN0) * 
XMAX(1,qp) * (1<<2) - 1 - bias;
 
  233     a= src[0] * factor[0];
 
  235         unsigned int threshold1= thres2[qp][i];
 
  236         unsigned int threshold2= (threshold1<<1);
 
  238         if(((
unsigned)(level+threshold1))>threshold2){
 
  239             a += level * factor[i];
 
  242     return (a + (1<<11))>>12;
 
  249     a= src[0] * factor[0];
 
  251         unsigned int threshold1= thres2[qp][i];
 
  252         unsigned int threshold2= (threshold1<<1);
 
  254         if(((
unsigned)(level+threshold1))>threshold2){
 
  255             if(((
unsigned)(level+2*threshold1))>2*threshold2){
 
  256                 a += level * factor[i];
 
  258                 if(level>0) a+= 2*(level - (int)threshold1)*factor[i];
 
  259                 else        a+= 2*(level + (int)threshold1)*factor[i];
 
  263     return (a + (1<<11))>>12;
 
  270     a= src[0] * factor[0];
 
  272         unsigned int threshold1= thres2[qp][i];
 
  273         unsigned int threshold2= (threshold1<<1);
 
  275         if(((
unsigned)(level+threshold1))>threshold2){
 
  276             if(level>0) a+= (level - (int)threshold1)*factor[i];
 
  277             else        a+= (level + (int)threshold1)*factor[i];
 
  280     return (a + (1<<11))>>12;
 
  290     int16_t *
temp= (int16_t *)(p->
src + 32);
 
  292     if (!src || !dst) 
return; 
 
  295         fast_memcpy(p_src + index, src + y*src_stride, width);
 
  297             p_src[index         - x - 1]= p_src[index +         x    ];
 
  298             p_src[index + width + x    ]= p_src[index + width - x - 1];
 
  302         fast_memcpy(p_src + (       7-y)*stride, p_src + (       y+8)*stride, stride);
 
  303         fast_memcpy(p_src + (height+8+y)*stride, p_src + (height-y+7)*stride, stride);
 
  308         for(x=-8; x<0; x+=4){
 
  309             const int index= x + y*stride + (8-3)*(1+stride) + 8; 
 
  311             int16_t *tp= 
temp+4*x;
 
  313             dctA_c(tp+4*8, src, stride);
 
  316             const int qps= 3 + is_luma;
 
  323                 qp= qp_store[ (
XMIN(x, width-1)>>qps) + (
XMIN(y, height-1)>>qps) * qp_stride];
 
  327                 const int index= x + y*stride + (8-3)*(1+stride) + 8; 
 
  329                 int16_t *tp= 
temp+4*x;
 
  333                     dctA_c(tp+4*8, src, stride);
 
  338                 v= (v + 
dither[y&7][x&7])>>6;
 
  339                 if((
unsigned)v > 255)
 
  341                 dst[x + y*dst_stride]= 
v;
 
  347 static int config(
struct vf_instance *vf,
 
  349     unsigned int flags, 
unsigned int outfmt){
 
  350     int h= (height+16+15)&(~15);
 
  352     vf->priv->temp_stride= (width+16+15)&(~15);
 
  363     mpi->
planes[0]=vf->dmpi->planes[0];
 
  364     mpi->
stride[0]=vf->dmpi->stride[0];
 
  365     mpi->
width=vf->dmpi->width;
 
  367         mpi->
planes[1]=vf->dmpi->planes[1];
 
  368         mpi->
planes[2]=vf->dmpi->planes[2];
 
  369         mpi->
stride[1]=vf->dmpi->stride[1];
 
  370         mpi->
stride[2]=vf->dmpi->stride[2];
 
  390     if(mpi->
qscale || vf->priv->qp){
 
  410 static void uninit(
struct vf_instance *vf){
 
  411     if(!vf->priv) 
return;
 
  439 static int control(
struct vf_instance *vf, 
int request, 
void* 
data){
 
  487     "Michael Niedermayer",