44 #define XMIN(a,b) ((a) < (b) ? (a) : (b))
45 #define XMAX(a,b) ((a) > (b) ? (a) : (b))
49 { 0, 48, 12, 60, 3, 51, 15, 63, },
50 { 32, 16, 44, 28, 35, 19, 47, 31, },
51 { 8, 56, 4, 52, 11, 59, 7, 55, },
52 { 40, 24, 36, 20, 43, 27, 39, 23, },
53 { 2, 50, 14, 62, 1, 49, 13, 61, },
54 { 34, 18, 46, 30, 33, 17, 45, 29, },
55 { 10, 58, 6, 54, 9, 57, 5, 53, },
56 { 42, 26, 38, 22, 41, 25, 37, 21, },
67 static inline void dct7_c(int16_t *dst,
int s0,
int s1,
int s2,
int s3,
int step){
71 #define C0 ((int)(1024*0.37796447300922719759+0.5)) //sqrt(1/7)
72 #define C1 ((int)(1024*0.53452248382484879308/6+0.5)) //sqrt(2/7)/6
74 #define C2 ((int)(1024*0.45221175985034745004/2+0.5))
75 #define C3 ((int)(1024*0.36264567479870879474/2+0.5))
78 #define C4 ((int)(1024*0.1962505182412941918+0.5))
79 #define C5 ((int)(1024*0.0149276808419397944+0.5))
83 dst[0*step] = ((s +
s3)*C0 + 512) >> 10;
84 s= (s - 6*
s3)*
C1 + 512;
85 d= (s0-
s2)*
C4 + (s1-s2)*
C5;
86 dst[1*step] = (s + 2*d)>>10;
88 d= (s1-
s0)*
C2 + (s1-s2)*
C3;
89 dst[2*step] = (s + d)>>10;
90 dst[3*step] = (s - d)>>10;
99 dst[1*step]= 2*s3 +
s2;
100 dst[3*step]= s3 - 2*
s2;
106 dst2[i*step/2] +=
src[j*step] * cos(i*
M_PI/n*(j+0.5)) * sqrt((i?2.0:1.0)/
n);
107 if(fabs(dst2[i*step/2] - dst[i*step/2]) > 20)
108 printf(
"%d %d %d (%d %d %d %d) -> (%d %d %d %d)\n", i,dst2[i*step/2], dst[i*step/2],
src[0*step],
src[1*step],
src[2*step],
src[3*step], dst[0*step], dst[1*step],dst[2*step],dst[3*step]);
118 int s0= src[0*stride] + src[6*stride];
119 int s1= src[1*stride] + src[5*stride];
120 int s2= src[2*stride] + src[4*stride];
121 int s3= src[3*stride];
140 int s0= src[0*4] + src[6*4];
141 int s1= src[1*4] + src[5*4];
142 int s2= src[2*4] + src[4*4];
161 "movq (%0), %%mm0 \n\t"
162 "movq 1*4*2(%0), %%mm1 \n\t"
163 "paddw 6*4*2(%0), %%mm0 \n\t"
164 "paddw 5*4*2(%0), %%mm1 \n\t"
165 "movq 2*4*2(%0), %%mm2 \n\t"
166 "movq 3*4*2(%0), %%mm3 \n\t"
167 "paddw 4*4*2(%0), %%mm2 \n\t"
168 "paddw %%mm3, %%mm3 \n\t"
169 "movq %%mm3, %%mm4 \n\t"
170 "psubw %%mm0, %%mm3 \n\t"
171 "paddw %%mm0, %%mm4 \n\t"
172 "movq %%mm2, %%mm0 \n\t"
173 "psubw %%mm1, %%mm2 \n\t"
174 "paddw %%mm1, %%mm0 \n\t"
175 "movq %%mm4, %%mm1 \n\t"
176 "psubw %%mm0, %%mm4 \n\t"
177 "paddw %%mm0, %%mm1 \n\t"
178 "movq %%mm3, %%mm0 \n\t"
179 "psubw %%mm2, %%mm3 \n\t"
180 "psubw %%mm2, %%mm3 \n\t"
181 "paddw %%mm0, %%mm2 \n\t"
182 "paddw %%mm0, %%mm2 \n\t"
183 "movq %%mm1, (%1) \n\t"
184 "movq %%mm4, 2*4*2(%1) \n\t"
185 "movq %%mm2, 1*4*2(%1) \n\t"
186 "movq %%mm3, 3*4*2(%1) \n\t"
187 ::
"r" (
src),
"r"(dst)
198 #define SN1 2.2360679775
199 #define SN2 3.16227766017
204 N/(N1*
N0),
N/(N1*N1),
N/(N1*
N0),
N/(N1*N2),
206 N/(N2*
N0),
N/(N2*N1),
N/(N2*
N0),
N/(N2*N2),
211 N/(SN2*
SN0),
N/(SN2*SN2),
N/(SN2*
SN0),
N/(SN2*SN2),
213 N/(SN2*
SN0),
N/(SN2*SN2),
N/(SN2*
SN0),
N/(SN2*SN2),
222 for(qp=0; qp<99; qp++){
224 thres2[qp][i]= ((i&1)?
SN2:
SN0) * ((i&4)?
SN2:SN0) *
XMAX(1,qp) * (1<<2) - 1 - bias;
233 a= src[0] * factor[0];
235 unsigned int threshold1= thres2[qp][i];
236 unsigned int threshold2= (threshold1<<1);
238 if(((
unsigned)(level+threshold1))>threshold2){
239 a += level * factor[i];
242 return (a + (1<<11))>>12;
249 a= src[0] * factor[0];
251 unsigned int threshold1= thres2[qp][i];
252 unsigned int threshold2= (threshold1<<1);
254 if(((
unsigned)(level+threshold1))>threshold2){
255 if(((
unsigned)(level+2*threshold1))>2*threshold2){
256 a += level * factor[i];
258 if(level>0) a+= 2*(level - (int)threshold1)*factor[i];
259 else a+= 2*(level + (int)threshold1)*factor[i];
263 return (a + (1<<11))>>12;
270 a= src[0] * factor[0];
272 unsigned int threshold1= thres2[qp][i];
273 unsigned int threshold2= (threshold1<<1);
275 if(((
unsigned)(level+threshold1))>threshold2){
276 if(level>0) a+= (level - (int)threshold1)*factor[i];
277 else a+= (level + (int)threshold1)*factor[i];
280 return (a + (1<<11))>>12;
290 int16_t *
temp= (int16_t *)(p->
src + 32);
292 if (!src || !dst)
return;
294 int index= 8 + 8*stride + y*stride;
295 fast_memcpy(p_src + index, src + y*src_stride, width);
297 p_src[index - x - 1]= p_src[index + x ];
298 p_src[index + width + x ]= p_src[index + width - x - 1];
302 fast_memcpy(p_src + ( 7-y)*stride, p_src + ( y+8)*stride, stride);
303 fast_memcpy(p_src + (height+8+y)*stride, p_src + (height-y+7)*stride, stride);
308 for(x=-8; x<0; x+=4){
309 const int index= x + y*stride + (8-3)*(1+stride) + 8;
311 int16_t *tp=
temp+4*x;
313 dctA_c(tp+4*8, src, stride);
316 const int qps= 3 + is_luma;
323 qp= qp_store[ (
XMIN(x, width-1)>>qps) + (
XMIN(y, height-1)>>qps) * qp_stride];
327 const int index= x + y*stride + (8-3)*(1+stride) + 8;
329 int16_t *tp=
temp+4*x;
333 dctA_c(tp+4*8, src, stride);
338 v= (v +
dither[y&7][x&7])>>6;
339 if((
unsigned)v > 255)
341 dst[x + y*dst_stride]=
v;
347 static int config(
struct vf_instance *vf,
349 unsigned int flags,
unsigned int outfmt){
350 int h= (height+16+15)&(~15);
352 vf->priv->temp_stride= (width+16+15)&(~15);
363 mpi->
planes[0]=vf->dmpi->planes[0];
364 mpi->
stride[0]=vf->dmpi->stride[0];
365 mpi->
width=vf->dmpi->width;
367 mpi->
planes[1]=vf->dmpi->planes[1];
368 mpi->
planes[2]=vf->dmpi->planes[2];
369 mpi->
stride[1]=vf->dmpi->stride[1];
370 mpi->
stride[2]=vf->dmpi->stride[2];
390 if(mpi->
qscale || vf->priv->qp){
410 static void uninit(
struct vf_instance *vf){
411 if(!vf->priv)
return;
439 static int control(
struct vf_instance *vf,
int request,
void*
data){
487 "Michael Niedermayer",