00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <string.h>
00025 #include <inttypes.h>
00026 #include <math.h>
00027
00028 #include "config.h"
00029
00030 #include "mp_msg.h"
00031 #include "cpudetect.h"
00032
00033 #if HAVE_MALLOC_H
00034 #include <malloc.h>
00035 #endif
00036
00037 #include "libavutil/mem.h"
00038
00039 #include "img_format.h"
00040 #include "mp_image.h"
00041 #include "vf.h"
00042 #include "libvo/fastmemcpy.h"
00043
00044 #define XMIN(a,b) ((a) < (b) ? (a) : (b))
00045 #define XMAX(a,b) ((a) > (b) ? (a) : (b))
00046
00047 typedef short DCTELEM;
00048
00049
00050 static const uint8_t __attribute__((aligned(8))) dither[8][8]={
00051 { 0, 48, 12, 60, 3, 51, 15, 63, },
00052 { 32, 16, 44, 28, 35, 19, 47, 31, },
00053 { 8, 56, 4, 52, 11, 59, 7, 55, },
00054 { 40, 24, 36, 20, 43, 27, 39, 23, },
00055 { 2, 50, 14, 62, 1, 49, 13, 61, },
00056 { 34, 18, 46, 30, 33, 17, 45, 29, },
00057 { 10, 58, 6, 54, 9, 57, 5, 53, },
00058 { 42, 26, 38, 22, 41, 25, 37, 21, },
00059 };
00060
00061 struct vf_priv_s {
00062 int qp;
00063 int mode;
00064 int mpeg2;
00065 int temp_stride;
00066 uint8_t *src;
00067 };
00068 #if 0
00069 static inline void dct7_c(DCTELEM *dst, int s0, int s1, int s2, int s3, int step){
00070 int s, d;
00071 int dst2[64];
00072
00073 #define C0 ((int)(1024*0.37796447300922719759+0.5)) //sqrt(1/7)
00074 #define C1 ((int)(1024*0.53452248382484879308/6+0.5)) //sqrt(2/7)/6
00075
00076 #define C2 ((int)(1024*0.45221175985034745004/2+0.5))
00077 #define C3 ((int)(1024*0.36264567479870879474/2+0.5))
00078
00079
00080 #define C4 ((int)(1024*0.1962505182412941918+0.5))
00081 #define C5 ((int)(1024*0.0149276808419397944+0.5))
00082
00083 #if 0
00084 s= s0 + s1 + s2;
00085 dst[0*step] = ((s + s3)*C0 + 512) >> 10;
00086 s= (s - 6*s3)*C1 + 512;
00087 d= (s0-s2)*C4 + (s1-s2)*C5;
00088 dst[1*step] = (s + 2*d)>>10;
00089 s -= d;
00090 d= (s1-s0)*C2 + (s1-s2)*C3;
00091 dst[2*step] = (s + d)>>10;
00092 dst[3*step] = (s - d)>>10;
00093 #elif 1
00094 s = s3+s3;
00095 s3= s-s0;
00096 s0= s+s0;
00097 s = s2+s1;
00098 s2= s2-s1;
00099 dst[0*step]= s0 + s;
00100 dst[2*step]= s0 - s;
00101 dst[1*step]= 2*s3 + s2;
00102 dst[3*step]= s3 - 2*s2;
00103 #else
00104 int i,j,n=7;
00105 for(i=0; i<7; i+=2){
00106 dst2[i*step/2]= 0;
00107 for(j=0; j<4; j++)
00108 dst2[i*step/2] += src[j*step] * cos(i*M_PI/n*(j+0.5)) * sqrt((i?2.0:1.0)/n);
00109 if(fabs(dst2[i*step/2] - dst[i*step/2]) > 20)
00110 printf("%d %d %d (%d %d %d %d) -> (%d %d %d %d)\n", i,dst2[i*step/2], dst[i*step/2],src[0*step], src[1*step], src[2*step], src[3*step], dst[0*step], dst[1*step],dst[2*step],dst[3*step]);
00111 }
00112 #endif
00113 }
00114 #endif
00115
00116 static inline void dctA_c(DCTELEM *dst, uint8_t *src, int stride){
00117 int i;
00118
00119 for(i=0; i<4; i++){
00120 int s0= src[0*stride] + src[6*stride];
00121 int s1= src[1*stride] + src[5*stride];
00122 int s2= src[2*stride] + src[4*stride];
00123 int s3= src[3*stride];
00124 int s= s3+s3;
00125 s3= s-s0;
00126 s0= s+s0;
00127 s = s2+s1;
00128 s2= s2-s1;
00129 dst[0]= s0 + s;
00130 dst[2]= s0 - s;
00131 dst[1]= 2*s3 + s2;
00132 dst[3]= s3 - 2*s2;
00133 src++;
00134 dst+=4;
00135 }
00136 }
00137
00138 static void dctB_c(DCTELEM *dst, DCTELEM *src){
00139 int i;
00140
00141 for(i=0; i<4; i++){
00142 int s0= src[0*4] + src[6*4];
00143 int s1= src[1*4] + src[5*4];
00144 int s2= src[2*4] + src[4*4];
00145 int s3= src[3*4];
00146 int s= s3+s3;
00147 s3= s-s0;
00148 s0= s+s0;
00149 s = s2+s1;
00150 s2= s2-s1;
00151 dst[0*4]= s0 + s;
00152 dst[2*4]= s0 - s;
00153 dst[1*4]= 2*s3 + s2;
00154 dst[3*4]= s3 - 2*s2;
00155 src++;
00156 dst++;
00157 }
00158 }
00159
00160 #if HAVE_MMX
00161 static void dctB_mmx(DCTELEM *dst, DCTELEM *src){
00162 __asm__ volatile (
00163 "movq (%0), %%mm0 \n\t"
00164 "movq 1*4*2(%0), %%mm1 \n\t"
00165 "paddw 6*4*2(%0), %%mm0 \n\t"
00166 "paddw 5*4*2(%0), %%mm1 \n\t"
00167 "movq 2*4*2(%0), %%mm2 \n\t"
00168 "movq 3*4*2(%0), %%mm3 \n\t"
00169 "paddw 4*4*2(%0), %%mm2 \n\t"
00170 "paddw %%mm3, %%mm3 \n\t"
00171 "movq %%mm3, %%mm4 \n\t"
00172 "psubw %%mm0, %%mm3 \n\t"
00173 "paddw %%mm0, %%mm4 \n\t"
00174 "movq %%mm2, %%mm0 \n\t"
00175 "psubw %%mm1, %%mm2 \n\t"
00176 "paddw %%mm1, %%mm0 \n\t"
00177 "movq %%mm4, %%mm1 \n\t"
00178 "psubw %%mm0, %%mm4 \n\t"
00179 "paddw %%mm0, %%mm1 \n\t"
00180 "movq %%mm3, %%mm0 \n\t"
00181 "psubw %%mm2, %%mm3 \n\t"
00182 "psubw %%mm2, %%mm3 \n\t"
00183 "paddw %%mm0, %%mm2 \n\t"
00184 "paddw %%mm0, %%mm2 \n\t"
00185 "movq %%mm1, (%1) \n\t"
00186 "movq %%mm4, 2*4*2(%1) \n\t"
00187 "movq %%mm2, 1*4*2(%1) \n\t"
00188 "movq %%mm3, 3*4*2(%1) \n\t"
00189 :: "r" (src), "r"(dst)
00190 );
00191 }
00192 #endif
00193
00194 static void (*dctB)(DCTELEM *dst, DCTELEM *src)= dctB_c;
00195
00196 #define N0 4
00197 #define N1 5
00198 #define N2 10
00199 #define SN0 2
00200 #define SN1 2.2360679775
00201 #define SN2 3.16227766017
00202 #define N (1<<16)
00203
00204 static const int factor[16]={
00205 N/(N0*N0), N/(N0*N1), N/(N0*N0),N/(N0*N2),
00206 N/(N1*N0), N/(N1*N1), N/(N1*N0),N/(N1*N2),
00207 N/(N0*N0), N/(N0*N1), N/(N0*N0),N/(N0*N2),
00208 N/(N2*N0), N/(N2*N1), N/(N2*N0),N/(N2*N2),
00209 };
00210
00211 static const int thres[16]={
00212 N/(SN0*SN0), N/(SN0*SN2), N/(SN0*SN0),N/(SN0*SN2),
00213 N/(SN2*SN0), N/(SN2*SN2), N/(SN2*SN0),N/(SN2*SN2),
00214 N/(SN0*SN0), N/(SN0*SN2), N/(SN0*SN0),N/(SN0*SN2),
00215 N/(SN2*SN0), N/(SN2*SN2), N/(SN2*SN0),N/(SN2*SN2),
00216 };
00217
00218 static int thres2[99][16];
00219
00220 static void init_thres2(void){
00221 int qp, i;
00222 int bias= 0;
00223
00224 for(qp=0; qp<99; qp++){
00225 for(i=0; i<16; i++){
00226 thres2[qp][i]= ((i&1)?SN2:SN0) * ((i&4)?SN2:SN0) * XMAX(1,qp) * (1<<2) - 1 - bias;
00227 }
00228 }
00229 }
00230
00231 static int hardthresh_c(DCTELEM *src, int qp){
00232 int i;
00233 int a;
00234
00235 a= src[0] * factor[0];
00236 for(i=1; i<16; i++){
00237 unsigned int threshold1= thres2[qp][i];
00238 unsigned int threshold2= (threshold1<<1);
00239 int level= src[i];
00240 if(((unsigned)(level+threshold1))>threshold2){
00241 a += level * factor[i];
00242 }
00243 }
00244 return (a + (1<<11))>>12;
00245 }
00246
00247 static int mediumthresh_c(DCTELEM *src, int qp){
00248 int i;
00249 int a;
00250
00251 a= src[0] * factor[0];
00252 for(i=1; i<16; i++){
00253 unsigned int threshold1= thres2[qp][i];
00254 unsigned int threshold2= (threshold1<<1);
00255 int level= src[i];
00256 if(((unsigned)(level+threshold1))>threshold2){
00257 if(((unsigned)(level+2*threshold1))>2*threshold2){
00258 a += level * factor[i];
00259 }else{
00260 if(level>0) a+= 2*(level - (int)threshold1)*factor[i];
00261 else a+= 2*(level + (int)threshold1)*factor[i];
00262 }
00263 }
00264 }
00265 return (a + (1<<11))>>12;
00266 }
00267
00268 static int softthresh_c(DCTELEM *src, int qp){
00269 int i;
00270 int a;
00271
00272 a= src[0] * factor[0];
00273 for(i=1; i<16; i++){
00274 unsigned int threshold1= thres2[qp][i];
00275 unsigned int threshold2= (threshold1<<1);
00276 int level= src[i];
00277 if(((unsigned)(level+threshold1))>threshold2){
00278 if(level>0) a+= (level - (int)threshold1)*factor[i];
00279 else a+= (level + (int)threshold1)*factor[i];
00280 }
00281 }
00282 return (a + (1<<11))>>12;
00283 }
00284
00285 static int (*requantize)(DCTELEM *src, int qp)= hardthresh_c;
00286
00287 static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma){
00288 int x, y;
00289 const int stride= is_luma ? p->temp_stride : ((width+16+15)&(~15));
00290 uint8_t *p_src= p->src + 8*stride;
00291 DCTELEM *block= (DCTELEM *)p->src;
00292 DCTELEM *temp= (DCTELEM *)(p->src + 32);
00293
00294 if (!src || !dst) return;
00295 for(y=0; y<height; y++){
00296 int index= 8 + 8*stride + y*stride;
00297 fast_memcpy(p_src + index, src + y*src_stride, width);
00298 for(x=0; x<8; x++){
00299 p_src[index - x - 1]= p_src[index + x ];
00300 p_src[index + width + x ]= p_src[index + width - x - 1];
00301 }
00302 }
00303 for(y=0; y<8; y++){
00304 fast_memcpy(p_src + ( 7-y)*stride, p_src + ( y+8)*stride, stride);
00305 fast_memcpy(p_src + (height+8+y)*stride, p_src + (height-y+7)*stride, stride);
00306 }
00307
00308
00309 for(y=0; y<height; y++){
00310 for(x=-8; x<0; x+=4){
00311 const int index= x + y*stride + (8-3)*(1+stride) + 8;
00312 uint8_t *src = p_src + index;
00313 DCTELEM *tp= temp+4*x;
00314
00315 dctA_c(tp+4*8, src, stride);
00316 }
00317 for(x=0; x<width; ){
00318 const int qps= 3 + is_luma;
00319 int qp;
00320 int end= XMIN(x+8, width);
00321
00322 if(p->qp)
00323 qp= p->qp;
00324 else{
00325 qp= qp_store[ (XMIN(x, width-1)>>qps) + (XMIN(y, height-1)>>qps) * qp_stride];
00326 qp=norm_qscale(qp, p->mpeg2);
00327 }
00328 for(; x<end; x++){
00329 const int index= x + y*stride + (8-3)*(1+stride) + 8;
00330 uint8_t *src = p_src + index;
00331 DCTELEM *tp= temp+4*x;
00332 int v;
00333
00334 if((x&3)==0)
00335 dctA_c(tp+4*8, src, stride);
00336
00337 dctB(block, tp);
00338
00339 v= requantize(block, qp);
00340 v= (v + dither[y&7][x&7])>>6;
00341 if((unsigned)v > 255)
00342 v= (-v)>>31;
00343 dst[x + y*dst_stride]= v;
00344 }
00345 }
00346 }
00347 }
00348
00349 static int config(struct vf_instance *vf,
00350 int width, int height, int d_width, int d_height,
00351 unsigned int flags, unsigned int outfmt){
00352 int h= (height+16+15)&(~15);
00353
00354 vf->priv->temp_stride= (width+16+15)&(~15);
00355 vf->priv->src = av_malloc(vf->priv->temp_stride*(h+8)*sizeof(uint8_t));
00356
00357 return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);
00358 }
00359
00360 static void get_image(struct vf_instance *vf, mp_image_t *mpi){
00361 if(mpi->flags&MP_IMGFLAG_PRESERVE) return;
00362
00363 vf->dmpi=vf_get_image(vf->next,mpi->imgfmt,
00364 mpi->type, mpi->flags | MP_IMGFLAG_READABLE, mpi->width, mpi->height);
00365 mpi->planes[0]=vf->dmpi->planes[0];
00366 mpi->stride[0]=vf->dmpi->stride[0];
00367 mpi->width=vf->dmpi->width;
00368 if(mpi->flags&MP_IMGFLAG_PLANAR){
00369 mpi->planes[1]=vf->dmpi->planes[1];
00370 mpi->planes[2]=vf->dmpi->planes[2];
00371 mpi->stride[1]=vf->dmpi->stride[1];
00372 mpi->stride[2]=vf->dmpi->stride[2];
00373 }
00374 mpi->flags|=MP_IMGFLAG_DIRECT;
00375 }
00376
00377 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){
00378 mp_image_t *dmpi;
00379
00380 if(mpi->flags&MP_IMGFLAG_DIRECT){
00381 dmpi=vf->dmpi;
00382 }else{
00383
00384 dmpi=vf_get_image(vf->next,mpi->imgfmt,
00385 MP_IMGTYPE_TEMP,
00386 MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE,
00387 mpi->width,mpi->height);
00388 vf_clone_mpi_attributes(dmpi, mpi);
00389 }
00390
00391 vf->priv->mpeg2= mpi->qscale_type;
00392 if(mpi->qscale || vf->priv->qp){
00393 filter(vf->priv, dmpi->planes[0], mpi->planes[0], dmpi->stride[0], mpi->stride[0], mpi->w, mpi->h, mpi->qscale, mpi->qstride, 1);
00394 filter(vf->priv, dmpi->planes[1], mpi->planes[1], dmpi->stride[1], mpi->stride[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, mpi->qscale, mpi->qstride, 0);
00395 filter(vf->priv, dmpi->planes[2], mpi->planes[2], dmpi->stride[2], mpi->stride[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, mpi->qscale, mpi->qstride, 0);
00396 }else{
00397 memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]);
00398 memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]);
00399 memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]);
00400 }
00401
00402 #if HAVE_MMX
00403 if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
00404 #endif
00405 #if HAVE_MMX2
00406 if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
00407 #endif
00408
00409 return vf_next_put_image(vf,dmpi, pts);
00410 }
00411
00412 static void uninit(struct vf_instance *vf){
00413 if(!vf->priv) return;
00414
00415 av_free(vf->priv->src);
00416 vf->priv->src= NULL;
00417
00418 free(vf->priv);
00419 vf->priv=NULL;
00420 }
00421
00422
00423 static int query_format(struct vf_instance *vf, unsigned int fmt){
00424 switch(fmt){
00425 case IMGFMT_YVU9:
00426 case IMGFMT_IF09:
00427 case IMGFMT_YV12:
00428 case IMGFMT_I420:
00429 case IMGFMT_IYUV:
00430 case IMGFMT_CLPL:
00431 case IMGFMT_Y800:
00432 case IMGFMT_Y8:
00433 case IMGFMT_444P:
00434 case IMGFMT_422P:
00435 case IMGFMT_411P:
00436 return vf_next_query_format(vf,fmt);
00437 }
00438 return 0;
00439 }
00440
00441 static int control(struct vf_instance *vf, int request, void* data){
00442 return vf_next_control(vf,request,data);
00443 }
00444
00445 static int vf_open(vf_instance_t *vf, char *args){
00446 vf->config=config;
00447 vf->put_image=put_image;
00448 vf->get_image=get_image;
00449 vf->query_format=query_format;
00450 vf->uninit=uninit;
00451 vf->control= control;
00452 vf->priv=malloc(sizeof(struct vf_priv_s));
00453 memset(vf->priv, 0, sizeof(struct vf_priv_s));
00454
00455 if (args) sscanf(args, "%d:%d", &vf->priv->qp, &vf->priv->mode);
00456
00457 if(vf->priv->qp < 0)
00458 vf->priv->qp = 0;
00459
00460 init_thres2();
00461
00462 switch(vf->priv->mode){
00463 case 0: requantize= hardthresh_c; break;
00464 case 1: requantize= softthresh_c; break;
00465 default:
00466 case 2: requantize= mediumthresh_c; break;
00467 }
00468
00469 #if HAVE_MMX
00470 if(gCpuCaps.hasMMX){
00471 dctB= dctB_mmx;
00472 }
00473 #endif
00474 #if 0
00475 if(gCpuCaps.hasMMX){
00476 switch(vf->priv->mode){
00477 case 0: requantize= hardthresh_mmx; break;
00478 case 1: requantize= softthresh_mmx; break;
00479 }
00480 }
00481 #endif
00482
00483 return 1;
00484 }
00485
00486 const vf_info_t vf_info_pp7 = {
00487 "postprocess 7",
00488 "pp7",
00489 "Michael Niedermayer",
00490 "",
00491 vf_open,
00492 NULL
00493 };