67 #define pb_7f (~0UL/255 * 0x7f)
68 #define pb_80 (~0UL/255 * 0x80)
73 0, 8, 1, 9, 16, 24, 2, 10,
74 17, 25, 32, 40, 48, 56, 33, 41,
75 18, 26, 3, 11, 4, 12, 19, 27,
76 34, 42, 49, 57, 50, 58, 35, 43,
77 20, 28, 5, 13, 6, 14, 21, 29,
78 36, 44, 51, 59, 52, 60, 37, 45,
79 22, 30, 7, 15, 23, 31, 38, 46,
80 53, 61, 54, 62, 39, 47, 55, 63,
87 0, 1, 2, 3, 8, 9, 16, 17,
88 10, 11, 4, 5, 6, 7, 15, 14,
89 13, 12, 19, 18, 24, 25, 32, 33,
90 26, 27, 20, 21, 22, 23, 28, 29,
91 30, 31, 34, 35, 40, 41, 48, 49,
92 42, 43, 36, 37, 38, 39, 44, 45,
93 46, 47, 50, 51, 56, 57, 58, 59,
94 52, 53, 54, 55, 60, 61, 62, 63,
98 0, 8, 16, 24, 1, 9, 2, 10,
99 17, 25, 32, 40, 48, 56, 57, 49,
100 41, 33, 26, 18, 3, 11, 4, 12,
101 19, 27, 34, 42, 50, 58, 35, 43,
102 51, 59, 20, 28, 5, 13, 6, 14,
103 21, 29, 36, 44, 52, 60, 37, 45,
104 53, 61, 22, 30, 7, 15, 23, 31,
105 38, 46, 54, 62, 39, 47, 55, 63,
110 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
111 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
112 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
113 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
114 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
115 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
116 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
117 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
130 j = src_scantable[i];
144 int idct_permutation_type)
148 switch(idct_permutation_type){
151 idct_permutation[i]= i;
155 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
163 idct_permutation[i]= ((i&7)<<3) | (i>>3);
167 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
183 for (i = 0; i < 16; i++) {
184 for (j = 0; j < 16; j += 8) {
195 pix += line_size - 16;
206 for (i = 0; i < 16; i++) {
207 for (j = 0; j < 16; j += 8) {
219 register uint64_t x=*(uint64_t*)pix;
221 s += sq[(x>>8)&0xff];
222 s += sq[(x>>16)&0xff];
223 s += sq[(x>>24)&0xff];
224 s += sq[(x>>32)&0xff];
225 s += sq[(x>>40)&0xff];
226 s += sq[(x>>48)&0xff];
227 s += sq[(x>>56)&0xff];
229 register uint32_t x=*(uint32_t*)pix;
231 s += sq[(x>>8)&0xff];
232 s += sq[(x>>16)&0xff];
233 s += sq[(x>>24)&0xff];
234 x=*(uint32_t*)(pix+4);
236 s += sq[(x>>8)&0xff];
237 s += sq[(x>>16)&0xff];
238 s += sq[(x>>24)&0xff];
243 pix += line_size - 16;
251 for(i=0; i+8<=w; i+=8){
278 for (i = 0; i < h; i++) {
279 s += sq[pix1[0] - pix2[0]];
280 s += sq[pix1[1] - pix2[1]];
281 s += sq[pix1[2] - pix2[2]];
282 s += sq[pix1[3] - pix2[3]];
295 for (i = 0; i < h; i++) {
296 s += sq[pix1[0] - pix2[0]];
297 s += sq[pix1[1] - pix2[1]];
298 s += sq[pix1[2] - pix2[2]];
299 s += sq[pix1[3] - pix2[3]];
300 s += sq[pix1[4] - pix2[4]];
301 s += sq[pix1[5] - pix2[5]];
302 s += sq[pix1[6] - pix2[6]];
303 s += sq[pix1[7] - pix2[7]];
316 for (i = 0; i < h; i++) {
317 s += sq[pix1[ 0] - pix2[ 0]];
318 s += sq[pix1[ 1] - pix2[ 1]];
319 s += sq[pix1[ 2] - pix2[ 2]];
320 s += sq[pix1[ 3] - pix2[ 3]];
321 s += sq[pix1[ 4] - pix2[ 4]];
322 s += sq[pix1[ 5] - pix2[ 5]];
323 s += sq[pix1[ 6] - pix2[ 6]];
324 s += sq[pix1[ 7] - pix2[ 7]];
325 s += sq[pix1[ 8] - pix2[ 8]];
326 s += sq[pix1[ 9] - pix2[ 9]];
327 s += sq[pix1[10] - pix2[10]];
328 s += sq[pix1[11] - pix2[11]];
329 s += sq[pix1[12] - pix2[12]];
330 s += sq[pix1[13] - pix2[13]];
331 s += sq[pix1[14] - pix2[14]];
332 s += sq[pix1[15] - pix2[15]];
346 block[0] = s1[0] - s2[0];
347 block[1] = s1[1] - s2[1];
348 block[2] = s1[2] - s2[2];
349 block[3] = s1[3] - s2[3];
350 block[4] = s1[4] - s2[4];
351 block[5] = s1[5] - s2[5];
352 block[6] = s1[6] - s2[6];
353 block[7] = s1[7] - s2[7];
367 pixels[0] = av_clip_uint8(block[0]);
368 pixels[1] = av_clip_uint8(block[1]);
369 pixels[2] = av_clip_uint8(block[2]);
370 pixels[3] = av_clip_uint8(block[3]);
371 pixels[4] = av_clip_uint8(block[4]);
372 pixels[5] = av_clip_uint8(block[5]);
373 pixels[6] = av_clip_uint8(block[6]);
374 pixels[7] = av_clip_uint8(block[7]);
388 pixels[0] = av_clip_uint8(block[0]);
389 pixels[1] = av_clip_uint8(block[1]);
390 pixels[2] = av_clip_uint8(block[2]);
391 pixels[3] = av_clip_uint8(block[3]);
405 pixels[0] = av_clip_uint8(block[0]);
406 pixels[1] = av_clip_uint8(block[1]);
419 for (i = 0; i < 8; i++) {
420 for (j = 0; j < 8; j++) {
423 else if (*block > 127)
426 *pixels = (
uint8_t)(*block + 128);
430 pixels += (line_size - 8);
441 pixels[0] += block[0];
442 pixels[1] += block[1];
443 pixels[2] += block[2];
444 pixels[3] += block[3];
445 pixels[4] += block[4];
446 pixels[5] += block[5];
447 pixels[6] += block[6];
448 pixels[7] += block[7];
461 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
462 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
463 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
464 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
465 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
466 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
467 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
468 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
481 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
482 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
483 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
484 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
497 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
498 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
508 sum+=
FFABS(block[i]);
516 for (i = 0; i < h; i++) {
517 memset(block, value, 16);
526 for (i = 0; i < h; i++) {
527 memset(block, value, 8);
532 #define avg2(a,b) ((a+b+1)>>1)
533 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
537 const int A=(16-x16)*(16-y16);
538 const int B=( x16)*(16-y16);
539 const int C=(16-x16)*( y16);
540 const int D=( x16)*( y16);
545 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] +
rounder)>>8;
546 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] +
rounder)>>8;
547 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] +
rounder)>>8;
548 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] +
rounder)>>8;
549 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] +
rounder)>>8;
550 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] +
rounder)>>8;
551 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] +
rounder)>>8;
552 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] +
rounder)>>8;
562 const int s= 1<<
shift;
573 int src_x, src_y, frac_x, frac_y,
index;
582 if((
unsigned)src_x <
width){
583 if((
unsigned)src_y <
height){
584 index= src_x + src_y*
stride;
585 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
586 + src[index +1]* frac_x )*(s-frac_y)
587 + ( src[index+stride ]*(s-frac_x)
588 + src[index+stride+1]* frac_x )* frac_y
591 index= src_x + av_clip(src_y, 0, height)*
stride;
592 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
593 + src[index +1]* frac_x )*s
597 if((
unsigned)src_y <
height){
598 index= av_clip(src_x, 0, width) + src_y*
stride;
599 dst[y*stride + x]= ( ( src[
index ]*(s-frac_y)
600 + src[index+stride ]* frac_y )*s
603 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*
stride;
604 dst[y*stride + x]= src[
index ];
618 case 2: put_pixels2_8_c (dst, src, stride, height);
break;
619 case 4: put_pixels4_8_c (dst, src, stride, height);
break;
620 case 8: put_pixels8_8_c (dst, src, stride, height);
break;
621 case 16:put_pixels16_8_c(dst, src, stride, height);
break;
627 for (i=0; i <
height; i++) {
628 for (j=0; j <
width; j++) {
629 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
638 for (i=0; i <
height; i++) {
639 for (j=0; j <
width; j++) {
640 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
649 for (i=0; i <
height; i++) {
650 for (j=0; j <
width; j++) {
651 dst[j] = (683*(2*src[j] + src[j+
stride] + 1)) >> 11;
660 for (i=0; i <
height; i++) {
661 for (j=0; j <
width; j++) {
662 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+
stride] + 2*src[j+stride+1] + 6)) >> 15;
671 for (i=0; i <
height; i++) {
672 for (j=0; j <
width; j++) {
673 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15;
682 for (i=0; i <
height; i++) {
683 for (j=0; j <
width; j++) {
684 dst[j] = (683*(src[j] + 2*src[j+
stride] + 1)) >> 11;
693 for (i=0; i <
height; i++) {
694 for (j=0; j <
width; j++) {
695 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15;
704 for (i=0; i <
height; i++) {
705 for (j=0; j <
width; j++) {
706 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+
stride] + 4*src[j+stride+1] + 6)) >> 15;
715 case 2: avg_pixels2_8_c (dst, src, stride, height);
break;
716 case 4: avg_pixels4_8_c (dst, src, stride, height);
break;
717 case 8: avg_pixels8_8_c (dst, src, stride, height);
break;
718 case 16:avg_pixels16_8_c(dst, src, stride, height);
break;
724 for (i=0; i <
height; i++) {
725 for (j=0; j <
width; j++) {
726 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
735 for (i=0; i <
height; i++) {
736 for (j=0; j <
width; j++) {
737 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
746 for (i=0; i <
height; i++) {
747 for (j=0; j <
width; j++) {
748 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+
stride] + 1)) >> 11) + 1) >> 1;
757 for (i=0; i <
height; i++) {
758 for (j=0; j <
width; j++) {
759 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+
stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
768 for (i=0; i <
height; i++) {
769 for (j=0; j <
width; j++) {
770 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
779 for (i=0; i <
height; i++) {
780 for (j=0; j <
width; j++) {
781 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+
stride] + 1)) >> 11) + 1) >> 1;
790 for (i=0; i <
height; i++) {
791 for (j=0; j <
width; j++) {
792 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
801 for (i=0; i <
height; i++) {
802 for (j=0; j <
width; j++) {
803 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+
stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
810 #define QPEL_MC(r, OPNAME, RND, OP) \
811 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
812 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
816 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
817 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
818 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
819 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
820 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
821 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
822 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
823 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
829 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
831 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
835 const int src0= src[0*srcStride];\
836 const int src1= src[1*srcStride];\
837 const int src2= src[2*srcStride];\
838 const int src3= src[3*srcStride];\
839 const int src4= src[4*srcStride];\
840 const int src5= src[5*srcStride];\
841 const int src6= src[6*srcStride];\
842 const int src7= src[7*srcStride];\
843 const int src8= src[8*srcStride];\
844 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
845 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
846 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
847 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
848 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
849 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
850 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
851 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
857 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
858 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
863 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
864 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
865 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
866 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
867 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
868 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
869 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
870 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
871 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
872 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
873 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
874 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
875 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
876 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
877 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
878 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
884 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
885 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
890 const int src0= src[0*srcStride];\
891 const int src1= src[1*srcStride];\
892 const int src2= src[2*srcStride];\
893 const int src3= src[3*srcStride];\
894 const int src4= src[4*srcStride];\
895 const int src5= src[5*srcStride];\
896 const int src6= src[6*srcStride];\
897 const int src7= src[7*srcStride];\
898 const int src8= src[8*srcStride];\
899 const int src9= src[9*srcStride];\
900 const int src10= src[10*srcStride];\
901 const int src11= src[11*srcStride];\
902 const int src12= src[12*srcStride];\
903 const int src13= src[13*srcStride];\
904 const int src14= src[14*srcStride];\
905 const int src15= src[15*srcStride];\
906 const int src16= src[16*srcStride];\
907 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
908 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
909 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
910 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
911 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
912 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
913 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
914 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
915 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
916 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
917 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
918 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
919 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
920 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
921 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
922 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
928 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
930 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
931 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
934 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
935 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
938 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
940 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
941 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
944 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
947 copy_block9(full, src, 16, stride, 9);\
948 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
949 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
952 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
954 copy_block9(full, src, 16, stride, 9);\
955 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
958 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
961 copy_block9(full, src, 16, stride, 9);\
962 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
963 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
965 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
970 copy_block9(full, src, 16, stride, 9);\
971 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
972 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
973 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
974 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
976 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
980 copy_block9(full, src, 16, stride, 9);\
981 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
982 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
983 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
984 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
986 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
991 copy_block9(full, src, 16, stride, 9);\
992 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
993 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
994 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
995 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
997 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1000 uint8_t halfHV[64];\
1001 copy_block9(full, src, 16, stride, 9);\
1002 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1003 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1004 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1005 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1007 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1008 uint8_t full[16*9];\
1011 uint8_t halfHV[64];\
1012 copy_block9(full, src, 16, stride, 9);\
1013 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1014 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1015 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1016 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1018 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1019 uint8_t full[16*9];\
1021 uint8_t halfHV[64];\
1022 copy_block9(full, src, 16, stride, 9);\
1023 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1024 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1025 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1026 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1028 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1029 uint8_t full[16*9];\
1032 uint8_t halfHV[64];\
1033 copy_block9(full, src, 16, stride, 9);\
1034 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1035 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1036 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1037 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1039 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1040 uint8_t full[16*9];\
1042 uint8_t halfHV[64];\
1043 copy_block9(full, src, 16, stride, 9);\
1044 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1045 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1046 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1047 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1049 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1051 uint8_t halfHV[64];\
1052 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1053 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1054 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1056 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1058 uint8_t halfHV[64];\
1059 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1060 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1061 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1063 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1064 uint8_t full[16*9];\
1067 uint8_t halfHV[64];\
1068 copy_block9(full, src, 16, stride, 9);\
1069 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1070 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1071 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1072 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1074 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1075 uint8_t full[16*9];\
1077 copy_block9(full, src, 16, stride, 9);\
1078 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1079 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1080 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1082 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1083 uint8_t full[16*9];\
1086 uint8_t halfHV[64];\
1087 copy_block9(full, src, 16, stride, 9);\
1088 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1089 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1090 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1091 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1093 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1094 uint8_t full[16*9];\
1096 copy_block9(full, src, 16, stride, 9);\
1097 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1098 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1099 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1101 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1103 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1104 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1107 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1109 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1110 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1113 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1114 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1117 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1119 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1120 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1123 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1124 uint8_t full[24*17];\
1126 copy_block17(full, src, 24, stride, 17);\
1127 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1128 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1131 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1132 uint8_t full[24*17];\
1133 copy_block17(full, src, 24, stride, 17);\
1134 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1137 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1138 uint8_t full[24*17];\
1140 copy_block17(full, src, 24, stride, 17);\
1141 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1142 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1144 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1145 uint8_t full[24*17];\
1146 uint8_t halfH[272];\
1147 uint8_t halfV[256];\
1148 uint8_t halfHV[256];\
1149 copy_block17(full, src, 24, stride, 17);\
1150 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1151 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1152 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1153 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1155 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1156 uint8_t full[24*17];\
1157 uint8_t halfH[272];\
1158 uint8_t halfHV[256];\
1159 copy_block17(full, src, 24, stride, 17);\
1160 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1161 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1162 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1163 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1165 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1166 uint8_t full[24*17];\
1167 uint8_t halfH[272];\
1168 uint8_t halfV[256];\
1169 uint8_t halfHV[256];\
1170 copy_block17(full, src, 24, stride, 17);\
1171 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1172 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1173 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1174 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1176 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1177 uint8_t full[24*17];\
1178 uint8_t halfH[272];\
1179 uint8_t halfHV[256];\
1180 copy_block17(full, src, 24, stride, 17);\
1181 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1182 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1183 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1184 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1186 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1187 uint8_t full[24*17];\
1188 uint8_t halfH[272];\
1189 uint8_t halfV[256];\
1190 uint8_t halfHV[256];\
1191 copy_block17(full, src, 24, stride, 17);\
1192 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1193 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1194 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1195 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1197 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1198 uint8_t full[24*17];\
1199 uint8_t halfH[272];\
1200 uint8_t halfHV[256];\
1201 copy_block17(full, src, 24, stride, 17);\
1202 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1203 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1204 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1205 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1207 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1208 uint8_t full[24*17];\
1209 uint8_t halfH[272];\
1210 uint8_t halfV[256];\
1211 uint8_t halfHV[256];\
1212 copy_block17(full, src, 24, stride, 17);\
1213 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1214 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1215 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1216 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1218 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1219 uint8_t full[24*17];\
1220 uint8_t halfH[272];\
1221 uint8_t halfHV[256];\
1222 copy_block17(full, src, 24, stride, 17);\
1223 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1224 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1225 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1226 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1228 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1229 uint8_t halfH[272];\
1230 uint8_t halfHV[256];\
1231 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1232 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1233 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1235 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1236 uint8_t halfH[272];\
1237 uint8_t halfHV[256];\
1238 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1239 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1240 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1242 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1243 uint8_t full[24*17];\
1244 uint8_t halfH[272];\
1245 uint8_t halfV[256];\
1246 uint8_t halfHV[256];\
1247 copy_block17(full, src, 24, stride, 17);\
1248 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1249 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1250 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1251 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1253 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1254 uint8_t full[24*17];\
1255 uint8_t halfH[272];\
1256 copy_block17(full, src, 24, stride, 17);\
1257 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1258 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1259 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1261 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1262 uint8_t full[24*17];\
1263 uint8_t halfH[272];\
1264 uint8_t halfV[256];\
1265 uint8_t halfHV[256];\
1266 copy_block17(full, src, 24, stride, 17);\
1267 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1268 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1269 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1270 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1272 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1273 uint8_t full[24*17];\
1274 uint8_t halfH[272];\
1275 copy_block17(full, src, 24, stride, 17);\
1276 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1277 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1278 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1280 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1281 uint8_t halfH[272];\
1282 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1283 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1286 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1287 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1288 #define op_put(a, b) a = cm[((b) + 16)>>5]
1289 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1293 QPEL_MC(0, avg_ , _ ,
op_avg)
1296 #undef op_avg_no_rnd
1298 #undef op_put_no_rnd
1300 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1301 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1302 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1303 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1304 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1305 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1312 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1313 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1314 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1315 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1316 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1317 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1318 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1319 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1325 #if CONFIG_RV40_DECODER
1327 put_pixels16_xy2_8_c(dst, src, stride, 16);
1330 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1333 put_pixels8_xy2_8_c(dst, src, stride, 8);
1336 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1340 #if CONFIG_DIRAC_DECODER
1341 #define DIRAC_MC(OPNAME)\
1342 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1344 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
1346 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1348 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
1350 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1352 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
1353 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
1355 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1357 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1359 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1361 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1363 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1365 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
1366 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
1368 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1370 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1372 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1374 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1376 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1378 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
1379 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
1390 const int src_1= src[ -srcStride];
1391 const int src0 = src[0 ];
1392 const int src1 = src[ srcStride];
1393 const int src2 = src[2*srcStride];
1394 const int src3 = src[3*srcStride];
1395 const int src4 = src[4*srcStride];
1396 const int src5 = src[5*srcStride];
1397 const int src6 = src[6*srcStride];
1398 const int src7 = src[7*srcStride];
1399 const int src8 = src[8*srcStride];
1400 const int src9 = src[9*srcStride];
1401 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1402 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1403 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1404 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1405 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1406 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1407 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1408 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1417 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1427 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1441 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1450 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1459 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1469 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1471 if (d<-2*strength) d1= 0;
1472 else if(d<- strength) d1=-2*strength - d;
1473 else if(d< strength) d1= d;
1474 else if(d< 2*strength) d1= 2*strength - d;
1479 if(p1&256) p1= ~(p1>>31);
1480 if(p2&256) p2= ~(p2>>31);
1487 d2= av_clip((p0-p3)/4, -ad1, ad1);
1489 src[x-2*
stride] = p0 - d2;
1490 src[x+
stride] = p3 + d2;
1496 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1502 int p0= src[y*stride-2];
1503 int p1= src[y*stride-1];
1504 int p2= src[y*stride+0];
1505 int p3= src[y*stride+1];
1506 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1508 if (d<-2*strength) d1= 0;
1509 else if(d<- strength) d1=-2*strength - d;
1510 else if(d< strength) d1= d;
1511 else if(d< 2*strength) d1= 2*strength - d;
1516 if(p1&256) p1= ~(p1>>31);
1517 if(p2&256) p2= ~(p2>>31);
1519 src[y*stride-1] = p1;
1520 src[y*stride+0] = p2;
1524 d2= av_clip((p0-p3)/4, -ad1, ad1);
1526 src[y*stride-2] = p0 - d2;
1527 src[y*stride+1] = p3 + d2;
1537 temp[x ] = 4*src[x ];
1538 temp[x + 7*8] = 4*src[x + 7*
stride];
1542 xy = y * stride + x;
1544 temp[yz] = src[xy -
stride] + 2*src[xy] + src[xy +
stride];
1549 src[ y*
stride] = (temp[ y*8] + 2)>>2;
1550 src[7+y*
stride] = (temp[7+y*8] + 2)>>2;
1552 xy = y * stride + x;
1554 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1565 s += abs(pix1[0] - pix2[0]);
1566 s += abs(pix1[1] - pix2[1]);
1567 s += abs(pix1[2] - pix2[2]);
1568 s += abs(pix1[3] - pix2[3]);
1569 s += abs(pix1[4] - pix2[4]);
1570 s += abs(pix1[5] - pix2[5]);
1571 s += abs(pix1[6] - pix2[6]);
1572 s += abs(pix1[7] - pix2[7]);
1573 s += abs(pix1[8] - pix2[8]);
1574 s += abs(pix1[9] - pix2[9]);
1575 s += abs(pix1[10] - pix2[10]);
1576 s += abs(pix1[11] - pix2[11]);
1577 s += abs(pix1[12] - pix2[12]);
1578 s += abs(pix1[13] - pix2[13]);
1579 s += abs(pix1[14] - pix2[14]);
1580 s += abs(pix1[15] - pix2[15]);
1593 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1594 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1595 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1596 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1597 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1598 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1599 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1600 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1601 s += abs(pix1[8] -
avg2(pix2[8], pix2[9]));
1602 s += abs(pix1[9] -
avg2(pix2[9], pix2[10]));
1603 s += abs(pix1[10] -
avg2(pix2[10], pix2[11]));
1604 s += abs(pix1[11] -
avg2(pix2[11], pix2[12]));
1605 s += abs(pix1[12] -
avg2(pix2[12], pix2[13]));
1606 s += abs(pix1[13] -
avg2(pix2[13], pix2[14]));
1607 s += abs(pix1[14] -
avg2(pix2[14], pix2[15]));
1608 s += abs(pix1[15] -
avg2(pix2[15], pix2[16]));
1618 uint8_t *pix3 = pix2 + line_size;
1622 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1623 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1624 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1625 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1626 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1627 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1628 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1629 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1630 s += abs(pix1[8] -
avg2(pix2[8], pix3[8]));
1631 s += abs(pix1[9] -
avg2(pix2[9], pix3[9]));
1632 s += abs(pix1[10] -
avg2(pix2[10], pix3[10]));
1633 s += abs(pix1[11] -
avg2(pix2[11], pix3[11]));
1634 s += abs(pix1[12] -
avg2(pix2[12], pix3[12]));
1635 s += abs(pix1[13] -
avg2(pix2[13], pix3[13]));
1636 s += abs(pix1[14] -
avg2(pix2[14], pix3[14]));
1637 s += abs(pix1[15] -
avg2(pix2[15], pix3[15]));
1648 uint8_t *pix3 = pix2 + line_size;
1652 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1653 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1654 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1655 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1656 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1657 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1658 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1659 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1660 s += abs(pix1[8] -
avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1661 s += abs(pix1[9] -
avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1662 s += abs(pix1[10] -
avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1663 s += abs(pix1[11] -
avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1664 s += abs(pix1[12] -
avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1665 s += abs(pix1[13] -
avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1666 s += abs(pix1[14] -
avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1667 s += abs(pix1[15] -
avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1681 s += abs(pix1[0] - pix2[0]);
1682 s += abs(pix1[1] - pix2[1]);
1683 s += abs(pix1[2] - pix2[2]);
1684 s += abs(pix1[3] - pix2[3]);
1685 s += abs(pix1[4] - pix2[4]);
1686 s += abs(pix1[5] - pix2[5]);
1687 s += abs(pix1[6] - pix2[6]);
1688 s += abs(pix1[7] - pix2[7]);
1701 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1702 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1703 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1704 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1705 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1706 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1707 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1708 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1718 uint8_t *pix3 = pix2 + line_size;
1722 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1723 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1724 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1725 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1726 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1727 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1728 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1729 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1740 uint8_t *pix3 = pix2 + line_size;
1744 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1745 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1746 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1747 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1748 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1749 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1750 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1751 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1766 for(x=0; x<16; x++){
1767 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1770 for(x=0; x<15; x++){
1771 score2+=
FFABS( s1[x ] - s1[x +stride]
1772 - s1[x+1] + s1[x+1+stride])
1773 -
FFABS( s2[x ] - s2[x +stride]
1774 - s2[x+1] + s2[x+1+stride]);
1782 else return score1 +
FFABS(score2)*8;
1793 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1797 score2+=
FFABS( s1[x ] - s1[x +stride]
1798 - s1[x+1] + s1[x+1+stride])
1799 -
FFABS( s2[x ] - s2[x +stride]
1800 - s2[x+1] + s2[x+1+stride]);
1808 else return score1 +
FFABS(score2)*8;
1815 for(i=0; i<8*8; i++){
1821 sum += (w*
b)*(w*b)>>4;
1829 for(i=0; i<8*8; i++){
1841 memset(cmp, 0,
sizeof(
void*)*6);
1900 for(i=0; i<=w-
sizeof(long); i+=
sizeof(long)){
1901 long a = *(
long*)(src+i);
1902 long b = *(
long*)(dst+i);
1906 dst[i+0] += src[i+0];
1911 #if !HAVE_FAST_UNALIGNED
1912 if((
long)src2 & (
sizeof(
long)-1)){
1913 for(i=0; i+7<w; i+=8){
1914 dst[i+0] = src1[i+0]-src2[i+0];
1915 dst[i+1] = src1[i+1]-src2[i+1];
1916 dst[i+2] = src1[i+2]-src2[i+2];
1917 dst[i+3] = src1[i+3]-src2[i+3];
1918 dst[i+4] = src1[i+4]-src2[i+4];
1919 dst[i+5] = src1[i+5]-src2[i+5];
1920 dst[i+6] = src1[i+6]-src2[i+6];
1921 dst[i+7] = src1[i+7]-src2[i+7];
1925 for(i=0; i<=w-
sizeof(long); i+=
sizeof(long)){
1926 long a = *(
long*)(src1+i);
1927 long b = *(
long*)(src2+i);
1931 dst[i+0] = src1[i+0]-src2[i+0];
1942 l=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1959 const int pred=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1972 for(i=0; i<w-1; i++){
2029 #define BUTTERFLY2(o1,o2,i1,i2) \
2033 #define BUTTERFLY1(x,y) \
2042 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2053 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2054 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2055 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2056 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2098 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2099 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2100 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2101 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2132 sum -=
FFABS(temp[8*0] + temp[8*4]);
2150 const int s07 = SRC(0) + SRC(7);\
2151 const int s16 = SRC(1) + SRC(6);\
2152 const int s25 = SRC(2) + SRC(5);\
2153 const int s34 = SRC(3) + SRC(4);\
2154 const int a0 = s07 + s34;\
2155 const int a1 = s16 + s25;\
2156 const int a2 = s07 - s34;\
2157 const int a3 = s16 - s25;\
2158 const int d07 = SRC(0) - SRC(7);\
2159 const int d16 = SRC(1) - SRC(6);\
2160 const int d25 = SRC(2) - SRC(5);\
2161 const int d34 = SRC(3) - SRC(4);\
2162 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2163 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2164 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2165 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2167 DST(1, a4 + (a7>>2)) ;\
2168 DST(2, a2 + (a3>>1)) ;\
2169 DST(3, a5 + (a6>>2)) ;\
2171 DST(5, a6 - (a5>>2)) ;\
2172 DST(6, (a2>>1) - a3 ) ;\
2173 DST(7, (a4>>2) - a7 ) ;\
2184 #define SRC(x) dct[i][x]
2185 #define DST(x,v) dct[i][x]= v
2186 for( i = 0; i < 8; i++ )
2191 #define
SRC(x) dct[x][i]
2192 #define DST(x,v) sum += FFABS(v)
2193 for( i = 0; i < 8; i++ )
2220 int16_t *
const bak =
temp+64;
2228 memcpy(bak,
temp, 64*
sizeof(int16_t));
2235 sum+= (
temp[i]-bak[i])*(
temp[i]-bak[i]);
2275 for(i=start_i; i<last; i++){
2276 int j= scantable[i];
2281 if((level&(~127)) == 0){
2291 level=
temp[i] + 64;
2295 if((level&(~127)) == 0){
2311 distortion= s->
dsp.
sse[1](
NULL, lsrc2, lsrc1, 8, 8);
2313 return distortion + ((bits*s->
qscale*s->
qscale*109 + 64)>>7);
2346 for(i=start_i; i<last; i++){
2347 int j= scantable[i];
2352 if((level&(~127)) == 0){
2362 level=
temp[i] + 64;
2366 if((level&(~127)) == 0){
2375 #define VSAD_INTRA(size) \
2376 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2380 for(y=1; y<h; y++){ \
2381 for(x=0; x<size; x+=4){ \
2382 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2383 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2398 for(x=0; x<16; x++){
2399 score+=
FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2408 #define SQ(a) ((a)*(a))
2409 #define VSSE_INTRA(size) \
2410 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2414 for(y=1; y<h; y++){ \
2415 for(x=0; x<size; x+=4){ \
2416 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2417 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2432 for(x=0; x<16; x++){
2433 score+=
SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2446 for(i=0; i<
size; i++)
2447 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2451 #define WRAPPER8_16_SQ(name8, name16)\
2452 static int name16(void *s, uint8_t *dst, uint8_t *src, int stride, int h){\
2454 score +=name8(s, dst , src , stride, 8);\
2455 score +=name8(s, dst+8 , src+8 , stride, 8);\
2459 score +=name8(s, dst , src , stride, 8);\
2460 score +=name8(s, dst+8 , src+8 , stride, 8);\
2477 uint32_t maxi, uint32_t maxisign)
2480 if(a > mini)
return mini;
2481 else if((a^(1
U<<31)) > maxisign)
return maxi;
2487 uint32_t mini = *(uint32_t*)min;
2488 uint32_t maxi = *(uint32_t*)max;
2489 uint32_t maxisign = maxi ^ (1
U<<31);
2490 uint32_t *dsti = (uint32_t*)dst;
2491 const uint32_t *srci = (
const uint32_t*)src;
2492 for(i=0; i<
len; i+=8) {
2493 dsti[i + 0] =
clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2494 dsti[i + 1] =
clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2495 dsti[i + 2] =
clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2496 dsti[i + 3] =
clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2497 dsti[i + 4] =
clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2498 dsti[i + 5] =
clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2499 dsti[i + 6] =
clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2500 dsti[i + 7] =
clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2505 if(min < 0 && max > 0) {
2508 for(i=0; i <
len; i+=8) {
2509 dst[i ] = av_clipf(src[i ], min, max);
2510 dst[i + 1] = av_clipf(src[i + 1], min, max);
2511 dst[i + 2] = av_clipf(src[i + 2], min, max);
2512 dst[i + 3] = av_clipf(src[i + 3], min, max);
2513 dst[i + 4] = av_clipf(src[i + 4], min, max);
2514 dst[i + 5] = av_clipf(src[i + 5], min, max);
2515 dst[i + 6] = av_clipf(src[i + 6], min, max);
2516 dst[i + 7] = av_clipf(src[i + 7], min, max);
2526 res += *v1++ * *v2++;
2536 *v1++ += mul * *v3++;
2542 const int16_t *window,
unsigned int len)
2545 int len2 = len >> 1;
2547 for (i = 0; i < len2; i++) {
2548 int16_t w = window[i];
2549 output[i] = (
MUL16(input[i], w) + (1 << 14)) >> 15;
2550 output[len-i-1] = (
MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2558 *dst++ = av_clip(*src++, min, max);
2559 *dst++ = av_clip(*src++, min, max);
2560 *dst++ = av_clip(*src++, min, max);
2561 *dst++ = av_clip(*src++, min, max);
2562 *dst++ = av_clip(*src++, min, max);
2563 *dst++ = av_clip(*src++, min, max);
2564 *dst++ = av_clip(*src++, min, max);
2565 *dst++ = av_clip(*src++, min, max);
2605 dest[0] = av_clip_uint8((block[0] + 4)>>3);
2609 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
2617 for(i=0;i<512;i++) {
2625 static int did_fail=0;
2628 if((intptr_t)aligned & 15){
2630 #if HAVE_MMX || HAVE_ALTIVEC
2632 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2633 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2634 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2635 "Do not report crashes to FFmpeg developers.\n");
2666 #endif //CONFIG_ENCODERS
2673 }
else if(avctx->
lowres==2){
2678 }
else if(avctx->
lowres==3){
2752 #define dspfunc(PFX, IDX, NUM) \
2753 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2754 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2755 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2756 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2757 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2758 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2759 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2760 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2761 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2762 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2763 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2764 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2765 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2766 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2767 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2768 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2771 dspfunc(put_no_rnd_qpel, 0, 16);
2777 dspfunc(put_no_rnd_qpel, 1, 8);
2793 #define SET_CMP_FUNC(name) \
2794 c->name[0]= name ## 16_c;\
2795 c->name[1]= name ## 8x8_c;
2814 c->
vsad[4]= vsad_intra16_c;
2815 c->
vsad[5]= vsad_intra8_c;
2817 c->
vsse[4]= vsse_intra16_c;
2818 c->
vsse[5]= vsse_intra8_c;
2821 #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
2836 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2859 #define hpel_funcs(prefix, idx, num) \
2860 c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \
2861 c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \
2862 c->prefix ## _pixels_tab idx [2] = prefix ## _pixels ## num ## _y2_8_c; \
2863 c->prefix ## _pixels_tab idx [3] = prefix ## _pixels ## num ## _xy2_8_c
2879 #define FUNC(f, depth) f ## _ ## depth
2880 #define FUNCC(f, depth) f ## _ ## depth ## _c
2882 #define BIT_DEPTH_FUNCS(depth, dct)\
2883 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
2884 c->draw_edges = FUNCC(draw_edges , depth);\
2885 c->clear_block = FUNCC(clear_block ## dct , depth);\
2886 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\