FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dsputil.c
Go to the documentation of this file.
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * DSP utils
28  */
29 
30 #include "libavutil/imgutils.h"
31 #include "avcodec.h"
32 #include "dsputil.h"
33 #include "simple_idct.h"
34 #include "faandct.h"
35 #include "faanidct.h"
36 #include "mathops.h"
37 #include "mpegvideo.h"
38 #include "config.h"
39 #include "vorbis.h"
40 #include "diracdsp.h"
41 
42 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
43 uint32_t ff_squareTbl[512] = {0, };
44 
45 #define pixeltmp int16_t
46 #define BIT_DEPTH 9
47 #include "dsputil_template.c"
48 #undef BIT_DEPTH
49 
50 #define BIT_DEPTH 10
51 #include "dsputil_template.c"
52 #undef BIT_DEPTH
53 
54 #undef pixeltmp
55 #define pixeltmp int32_t
56 #define BIT_DEPTH 12
57 #include "dsputil_template.c"
58 #undef BIT_DEPTH
59 
60 #define BIT_DEPTH 14
61 #include "dsputil_template.c"
62 #undef BIT_DEPTH
63 
64 #undef pixeltmp
65 #define pixeltmp int16_t
66 #define BIT_DEPTH 8
67 #include "dsputil_template.c"
68 #undef pixeltmp
69 
70 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
71 #define pb_7f (~0UL/255 * 0x7f)
72 #define pb_80 (~0UL/255 * 0x80)
73 
75  0, 1, 8, 16, 9, 2, 3, 10,
76  17, 24, 32, 25, 18, 11, 4, 5,
77  12, 19, 26, 33, 40, 48, 41, 34,
78  27, 20, 13, 6, 7, 14, 21, 28,
79  35, 42, 49, 56, 57, 50, 43, 36,
80  29, 22, 15, 23, 30, 37, 44, 51,
81  58, 59, 52, 45, 38, 31, 39, 46,
82  53, 60, 61, 54, 47, 55, 62, 63
83 };
84 
85 /* Specific zigzag scan for 248 idct. NOTE that unlike the
86  specification, we interleave the fields */
88  0, 8, 1, 9, 16, 24, 2, 10,
89  17, 25, 32, 40, 48, 56, 33, 41,
90  18, 26, 3, 11, 4, 12, 19, 27,
91  34, 42, 49, 57, 50, 58, 35, 43,
92  20, 28, 5, 13, 6, 14, 21, 29,
93  36, 44, 51, 59, 52, 60, 37, 45,
94  22, 30, 7, 15, 23, 31, 38, 46,
95  53, 61, 54, 62, 39, 47, 55, 63,
96 };
97 
98 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
100 
102  0, 1, 2, 3, 8, 9, 16, 17,
103  10, 11, 4, 5, 6, 7, 15, 14,
104  13, 12, 19, 18, 24, 25, 32, 33,
105  26, 27, 20, 21, 22, 23, 28, 29,
106  30, 31, 34, 35, 40, 41, 48, 49,
107  42, 43, 36, 37, 38, 39, 44, 45,
108  46, 47, 50, 51, 56, 57, 58, 59,
109  52, 53, 54, 55, 60, 61, 62, 63,
110 };
111 
113  0, 8, 16, 24, 1, 9, 2, 10,
114  17, 25, 32, 40, 48, 56, 57, 49,
115  41, 33, 26, 18, 3, 11, 4, 12,
116  19, 27, 34, 42, 50, 58, 35, 43,
117  51, 59, 20, 28, 5, 13, 6, 14,
118  21, 29, 36, 44, 52, 60, 37, 45,
119  53, 61, 22, 30, 7, 15, 23, 31,
120  38, 46, 54, 62, 39, 47, 55, 63,
121 };
122 
123 /* Input permutation for the simple_idct_mmx */
124 static const uint8_t simple_mmx_permutation[64]={
125  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
126  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
127  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
128  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
129  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
130  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
131  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
132  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
133 };
134 
135 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
136 
137 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
138  int i;
139  int end;
140 
141  st->scantable= src_scantable;
142 
143  for(i=0; i<64; i++){
144  int j;
145  j = src_scantable[i];
146  st->permutated[i] = permutation[j];
147  }
148 
149  end=-1;
150  for(i=0; i<64; i++){
151  int j;
152  j = st->permutated[i];
153  if(j>end) end=j;
154  st->raster_end[i]= end;
155  }
156 }
157 
158 void ff_init_scantable_permutation(uint8_t *idct_permutation,
159  int idct_permutation_type)
160 {
161  int i;
162 
163  switch(idct_permutation_type){
164  case FF_NO_IDCT_PERM:
165  for(i=0; i<64; i++)
166  idct_permutation[i]= i;
167  break;
169  for(i=0; i<64; i++)
170  idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
171  break;
172  case FF_SIMPLE_IDCT_PERM:
173  for(i=0; i<64; i++)
174  idct_permutation[i]= simple_mmx_permutation[i];
175  break;
177  for(i=0; i<64; i++)
178  idct_permutation[i]= ((i&7)<<3) | (i>>3);
179  break;
181  for(i=0; i<64; i++)
182  idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
183  break;
184  case FF_SSE2_IDCT_PERM:
185  for(i=0; i<64; i++)
186  idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
187  break;
188  default:
189  av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
190  }
191 }
192 
193 static int pix_sum_c(uint8_t * pix, int line_size)
194 {
195  int s, i, j;
196 
197  s = 0;
198  for (i = 0; i < 16; i++) {
199  for (j = 0; j < 16; j += 8) {
200  s += pix[0];
201  s += pix[1];
202  s += pix[2];
203  s += pix[3];
204  s += pix[4];
205  s += pix[5];
206  s += pix[6];
207  s += pix[7];
208  pix += 8;
209  }
210  pix += line_size - 16;
211  }
212  return s;
213 }
214 
215 static int pix_norm1_c(uint8_t * pix, int line_size)
216 {
217  int s, i, j;
218  uint32_t *sq = ff_squareTbl + 256;
219 
220  s = 0;
221  for (i = 0; i < 16; i++) {
222  for (j = 0; j < 16; j += 8) {
223 #if 0
224  s += sq[pix[0]];
225  s += sq[pix[1]];
226  s += sq[pix[2]];
227  s += sq[pix[3]];
228  s += sq[pix[4]];
229  s += sq[pix[5]];
230  s += sq[pix[6]];
231  s += sq[pix[7]];
232 #else
233 #if HAVE_FAST_64BIT
234  register uint64_t x=*(uint64_t*)pix;
235  s += sq[x&0xff];
236  s += sq[(x>>8)&0xff];
237  s += sq[(x>>16)&0xff];
238  s += sq[(x>>24)&0xff];
239  s += sq[(x>>32)&0xff];
240  s += sq[(x>>40)&0xff];
241  s += sq[(x>>48)&0xff];
242  s += sq[(x>>56)&0xff];
243 #else
244  register uint32_t x=*(uint32_t*)pix;
245  s += sq[x&0xff];
246  s += sq[(x>>8)&0xff];
247  s += sq[(x>>16)&0xff];
248  s += sq[(x>>24)&0xff];
249  x=*(uint32_t*)(pix+4);
250  s += sq[x&0xff];
251  s += sq[(x>>8)&0xff];
252  s += sq[(x>>16)&0xff];
253  s += sq[(x>>24)&0xff];
254 #endif
255 #endif
256  pix += 8;
257  }
258  pix += line_size - 16;
259  }
260  return s;
261 }
262 
263 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
264  int i;
265 
266  for(i=0; i+8<=w; i+=8){
267  dst[i+0]= av_bswap32(src[i+0]);
268  dst[i+1]= av_bswap32(src[i+1]);
269  dst[i+2]= av_bswap32(src[i+2]);
270  dst[i+3]= av_bswap32(src[i+3]);
271  dst[i+4]= av_bswap32(src[i+4]);
272  dst[i+5]= av_bswap32(src[i+5]);
273  dst[i+6]= av_bswap32(src[i+6]);
274  dst[i+7]= av_bswap32(src[i+7]);
275  }
276  for(;i<w; i++){
277  dst[i+0]= av_bswap32(src[i+0]);
278  }
279 }
280 
281 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
282 {
283  while (len--)
284  *dst++ = av_bswap16(*src++);
285 }
286 
287 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
288 {
289  int s, i;
290  uint32_t *sq = ff_squareTbl + 256;
291 
292  s = 0;
293  for (i = 0; i < h; i++) {
294  s += sq[pix1[0] - pix2[0]];
295  s += sq[pix1[1] - pix2[1]];
296  s += sq[pix1[2] - pix2[2]];
297  s += sq[pix1[3] - pix2[3]];
298  pix1 += line_size;
299  pix2 += line_size;
300  }
301  return s;
302 }
303 
304 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
305 {
306  int s, i;
307  uint32_t *sq = ff_squareTbl + 256;
308 
309  s = 0;
310  for (i = 0; i < h; i++) {
311  s += sq[pix1[0] - pix2[0]];
312  s += sq[pix1[1] - pix2[1]];
313  s += sq[pix1[2] - pix2[2]];
314  s += sq[pix1[3] - pix2[3]];
315  s += sq[pix1[4] - pix2[4]];
316  s += sq[pix1[5] - pix2[5]];
317  s += sq[pix1[6] - pix2[6]];
318  s += sq[pix1[7] - pix2[7]];
319  pix1 += line_size;
320  pix2 += line_size;
321  }
322  return s;
323 }
324 
325 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
326 {
327  int s, i;
328  uint32_t *sq = ff_squareTbl + 256;
329 
330  s = 0;
331  for (i = 0; i < h; i++) {
332  s += sq[pix1[ 0] - pix2[ 0]];
333  s += sq[pix1[ 1] - pix2[ 1]];
334  s += sq[pix1[ 2] - pix2[ 2]];
335  s += sq[pix1[ 3] - pix2[ 3]];
336  s += sq[pix1[ 4] - pix2[ 4]];
337  s += sq[pix1[ 5] - pix2[ 5]];
338  s += sq[pix1[ 6] - pix2[ 6]];
339  s += sq[pix1[ 7] - pix2[ 7]];
340  s += sq[pix1[ 8] - pix2[ 8]];
341  s += sq[pix1[ 9] - pix2[ 9]];
342  s += sq[pix1[10] - pix2[10]];
343  s += sq[pix1[11] - pix2[11]];
344  s += sq[pix1[12] - pix2[12]];
345  s += sq[pix1[13] - pix2[13]];
346  s += sq[pix1[14] - pix2[14]];
347  s += sq[pix1[15] - pix2[15]];
348 
349  pix1 += line_size;
350  pix2 += line_size;
351  }
352  return s;
353 }
354 
356  const uint8_t *s2, int stride){
357  int i;
358 
359  /* read the pixels */
360  for(i=0;i<8;i++) {
361  block[0] = s1[0] - s2[0];
362  block[1] = s1[1] - s2[1];
363  block[2] = s1[2] - s2[2];
364  block[3] = s1[3] - s2[3];
365  block[4] = s1[4] - s2[4];
366  block[5] = s1[5] - s2[5];
367  block[6] = s1[6] - s2[6];
368  block[7] = s1[7] - s2[7];
369  s1 += stride;
370  s2 += stride;
371  block += 8;
372  }
373 }
374 
375 
377  int line_size)
378 {
379  int i;
380 
381  /* read the pixels */
382  for(i=0;i<8;i++) {
383  pixels[0] = av_clip_uint8(block[0]);
384  pixels[1] = av_clip_uint8(block[1]);
385  pixels[2] = av_clip_uint8(block[2]);
386  pixels[3] = av_clip_uint8(block[3]);
387  pixels[4] = av_clip_uint8(block[4]);
388  pixels[5] = av_clip_uint8(block[5]);
389  pixels[6] = av_clip_uint8(block[6]);
390  pixels[7] = av_clip_uint8(block[7]);
391 
392  pixels += line_size;
393  block += 8;
394  }
395 }
396 
398  int line_size)
399 {
400  int i;
401 
402  /* read the pixels */
403  for(i=0;i<4;i++) {
404  pixels[0] = av_clip_uint8(block[0]);
405  pixels[1] = av_clip_uint8(block[1]);
406  pixels[2] = av_clip_uint8(block[2]);
407  pixels[3] = av_clip_uint8(block[3]);
408 
409  pixels += line_size;
410  block += 8;
411  }
412 }
413 
415  int line_size)
416 {
417  int i;
418 
419  /* read the pixels */
420  for(i=0;i<2;i++) {
421  pixels[0] = av_clip_uint8(block[0]);
422  pixels[1] = av_clip_uint8(block[1]);
423 
424  pixels += line_size;
425  block += 8;
426  }
427 }
428 
430  uint8_t *av_restrict pixels,
431  int line_size)
432 {
433  int i, j;
434 
435  for (i = 0; i < 8; i++) {
436  for (j = 0; j < 8; j++) {
437  if (*block < -128)
438  *pixels = 0;
439  else if (*block > 127)
440  *pixels = 255;
441  else
442  *pixels = (uint8_t)(*block + 128);
443  block++;
444  pixels++;
445  }
446  pixels += (line_size - 8);
447  }
448 }
449 
451  int line_size)
452 {
453  int i;
454 
455  /* read the pixels */
456  for(i=0;i<8;i++) {
457  pixels[0] = av_clip_uint8(pixels[0] + block[0]);
458  pixels[1] = av_clip_uint8(pixels[1] + block[1]);
459  pixels[2] = av_clip_uint8(pixels[2] + block[2]);
460  pixels[3] = av_clip_uint8(pixels[3] + block[3]);
461  pixels[4] = av_clip_uint8(pixels[4] + block[4]);
462  pixels[5] = av_clip_uint8(pixels[5] + block[5]);
463  pixels[6] = av_clip_uint8(pixels[6] + block[6]);
464  pixels[7] = av_clip_uint8(pixels[7] + block[7]);
465  pixels += line_size;
466  block += 8;
467  }
468 }
469 
471  int line_size)
472 {
473  int i;
474 
475  /* read the pixels */
476  for(i=0;i<4;i++) {
477  pixels[0] = av_clip_uint8(pixels[0] + block[0]);
478  pixels[1] = av_clip_uint8(pixels[1] + block[1]);
479  pixels[2] = av_clip_uint8(pixels[2] + block[2]);
480  pixels[3] = av_clip_uint8(pixels[3] + block[3]);
481  pixels += line_size;
482  block += 8;
483  }
484 }
485 
487  int line_size)
488 {
489  int i;
490 
491  /* read the pixels */
492  for(i=0;i<2;i++) {
493  pixels[0] = av_clip_uint8(pixels[0] + block[0]);
494  pixels[1] = av_clip_uint8(pixels[1] + block[1]);
495  pixels += line_size;
496  block += 8;
497  }
498 }
499 
501 {
502  int sum=0, i;
503  for(i=0; i<64; i++)
504  sum+= FFABS(block[i]);
505  return sum;
506 }
507 
508 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
509 {
510  int i;
511 
512  for (i = 0; i < h; i++) {
513  memset(block, value, 16);
514  block += line_size;
515  }
516 }
517 
518 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
519 {
520  int i;
521 
522  for (i = 0; i < h; i++) {
523  memset(block, value, 8);
524  block += line_size;
525  }
526 }
527 
528 #define avg2(a,b) ((a+b+1)>>1)
529 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
530 
531 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
532 {
533  const int A=(16-x16)*(16-y16);
534  const int B=( x16)*(16-y16);
535  const int C=(16-x16)*( y16);
536  const int D=( x16)*( y16);
537  int i;
538 
539  for(i=0; i<h; i++)
540  {
541  dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
542  dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
543  dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
544  dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
545  dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
546  dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
547  dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
548  dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
549  dst+= stride;
550  src+= stride;
551  }
552 }
553 
554 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
555  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
556 {
557  int y, vx, vy;
558  const int s= 1<<shift;
559 
560  width--;
561  height--;
562 
563  for(y=0; y<h; y++){
564  int x;
565 
566  vx= ox;
567  vy= oy;
568  for(x=0; x<8; x++){ //XXX FIXME optimize
569  int src_x, src_y, frac_x, frac_y, index;
570 
571  src_x= vx>>16;
572  src_y= vy>>16;
573  frac_x= src_x&(s-1);
574  frac_y= src_y&(s-1);
575  src_x>>=shift;
576  src_y>>=shift;
577 
578  if((unsigned)src_x < width){
579  if((unsigned)src_y < height){
580  index= src_x + src_y*stride;
581  dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
582  + src[index +1]* frac_x )*(s-frac_y)
583  + ( src[index+stride ]*(s-frac_x)
584  + src[index+stride+1]* frac_x )* frac_y
585  + r)>>(shift*2);
586  }else{
587  index= src_x + av_clip(src_y, 0, height)*stride;
588  dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
589  + src[index +1]* frac_x )*s
590  + r)>>(shift*2);
591  }
592  }else{
593  if((unsigned)src_y < height){
594  index= av_clip(src_x, 0, width) + src_y*stride;
595  dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
596  + src[index+stride ]* frac_y )*s
597  + r)>>(shift*2);
598  }else{
599  index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
600  dst[y*stride + x]= src[index ];
601  }
602  }
603 
604  vx+= dxx;
605  vy+= dyx;
606  }
607  ox += dxy;
608  oy += dyy;
609  }
610 }
611 
612 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
613  switch(width){
614  case 2: put_pixels2_8_c (dst, src, stride, height); break;
615  case 4: put_pixels4_8_c (dst, src, stride, height); break;
616  case 8: put_pixels8_8_c (dst, src, stride, height); break;
617  case 16:put_pixels16_8_c(dst, src, stride, height); break;
618  }
619 }
620 
621 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
622  int i,j;
623  for (i=0; i < height; i++) {
624  for (j=0; j < width; j++) {
625  dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
626  }
627  src += stride;
628  dst += stride;
629  }
630 }
631 
632 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
633  int i,j;
634  for (i=0; i < height; i++) {
635  for (j=0; j < width; j++) {
636  dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
637  }
638  src += stride;
639  dst += stride;
640  }
641 }
642 
643 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
644  int i,j;
645  for (i=0; i < height; i++) {
646  for (j=0; j < width; j++) {
647  dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
648  }
649  src += stride;
650  dst += stride;
651  }
652 }
653 
654 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
655  int i,j;
656  for (i=0; i < height; i++) {
657  for (j=0; j < width; j++) {
658  dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
659  }
660  src += stride;
661  dst += stride;
662  }
663 }
664 
665 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
666  int i,j;
667  for (i=0; i < height; i++) {
668  for (j=0; j < width; j++) {
669  dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
670  }
671  src += stride;
672  dst += stride;
673  }
674 }
675 
676 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
677  int i,j;
678  for (i=0; i < height; i++) {
679  for (j=0; j < width; j++) {
680  dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
681  }
682  src += stride;
683  dst += stride;
684  }
685 }
686 
687 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
688  int i,j;
689  for (i=0; i < height; i++) {
690  for (j=0; j < width; j++) {
691  dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
692  }
693  src += stride;
694  dst += stride;
695  }
696 }
697 
698 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
699  int i,j;
700  for (i=0; i < height; i++) {
701  for (j=0; j < width; j++) {
702  dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
703  }
704  src += stride;
705  dst += stride;
706  }
707 }
708 
709 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
710  switch(width){
711  case 2: avg_pixels2_8_c (dst, src, stride, height); break;
712  case 4: avg_pixels4_8_c (dst, src, stride, height); break;
713  case 8: avg_pixels8_8_c (dst, src, stride, height); break;
714  case 16:avg_pixels16_8_c(dst, src, stride, height); break;
715  }
716 }
717 
718 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
719  int i,j;
720  for (i=0; i < height; i++) {
721  for (j=0; j < width; j++) {
722  dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
723  }
724  src += stride;
725  dst += stride;
726  }
727 }
728 
729 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
730  int i,j;
731  for (i=0; i < height; i++) {
732  for (j=0; j < width; j++) {
733  dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
734  }
735  src += stride;
736  dst += stride;
737  }
738 }
739 
740 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
741  int i,j;
742  for (i=0; i < height; i++) {
743  for (j=0; j < width; j++) {
744  dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
745  }
746  src += stride;
747  dst += stride;
748  }
749 }
750 
751 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
752  int i,j;
753  for (i=0; i < height; i++) {
754  for (j=0; j < width; j++) {
755  dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
756  }
757  src += stride;
758  dst += stride;
759  }
760 }
761 
762 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
763  int i,j;
764  for (i=0; i < height; i++) {
765  for (j=0; j < width; j++) {
766  dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
767  }
768  src += stride;
769  dst += stride;
770  }
771 }
772 
773 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
774  int i,j;
775  for (i=0; i < height; i++) {
776  for (j=0; j < width; j++) {
777  dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
778  }
779  src += stride;
780  dst += stride;
781  }
782 }
783 
784 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
785  int i,j;
786  for (i=0; i < height; i++) {
787  for (j=0; j < width; j++) {
788  dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
789  }
790  src += stride;
791  dst += stride;
792  }
793 }
794 
795 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
796  int i,j;
797  for (i=0; i < height; i++) {
798  for (j=0; j < width; j++) {
799  dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
800  }
801  src += stride;
802  dst += stride;
803  }
804 }
805 
806 #define QPEL_MC(r, OPNAME, RND, OP) \
807 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
808  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
809  int i;\
810  for(i=0; i<h; i++)\
811  {\
812  OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
813  OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
814  OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
815  OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
816  OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
817  OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
818  OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
819  OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
820  dst+=dstStride;\
821  src+=srcStride;\
822  }\
823 }\
824 \
825 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
826  const int w=8;\
827  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
828  int i;\
829  for(i=0; i<w; i++)\
830  {\
831  const int src0= src[0*srcStride];\
832  const int src1= src[1*srcStride];\
833  const int src2= src[2*srcStride];\
834  const int src3= src[3*srcStride];\
835  const int src4= src[4*srcStride];\
836  const int src5= src[5*srcStride];\
837  const int src6= src[6*srcStride];\
838  const int src7= src[7*srcStride];\
839  const int src8= src[8*srcStride];\
840  OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
841  OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
842  OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
843  OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
844  OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
845  OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
846  OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
847  OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
848  dst++;\
849  src++;\
850  }\
851 }\
852 \
853 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
854  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
855  int i;\
856  \
857  for(i=0; i<h; i++)\
858  {\
859  OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
860  OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
861  OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
862  OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
863  OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
864  OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
865  OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
866  OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
867  OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
868  OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
869  OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
870  OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
871  OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
872  OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
873  OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
874  OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
875  dst+=dstStride;\
876  src+=srcStride;\
877  }\
878 }\
879 \
880 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
881  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
882  int i;\
883  const int w=16;\
884  for(i=0; i<w; i++)\
885  {\
886  const int src0= src[0*srcStride];\
887  const int src1= src[1*srcStride];\
888  const int src2= src[2*srcStride];\
889  const int src3= src[3*srcStride];\
890  const int src4= src[4*srcStride];\
891  const int src5= src[5*srcStride];\
892  const int src6= src[6*srcStride];\
893  const int src7= src[7*srcStride];\
894  const int src8= src[8*srcStride];\
895  const int src9= src[9*srcStride];\
896  const int src10= src[10*srcStride];\
897  const int src11= src[11*srcStride];\
898  const int src12= src[12*srcStride];\
899  const int src13= src[13*srcStride];\
900  const int src14= src[14*srcStride];\
901  const int src15= src[15*srcStride];\
902  const int src16= src[16*srcStride];\
903  OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
904  OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
905  OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
906  OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
907  OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
908  OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
909  OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
910  OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
911  OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
912  OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
913  OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
914  OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
915  OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
916  OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
917  OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
918  OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
919  dst++;\
920  src++;\
921  }\
922 }\
923 \
924 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
925  uint8_t half[64];\
926  put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
927  OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
928 }\
929 \
930 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
931  OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
932 }\
933 \
934 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
935  uint8_t half[64];\
936  put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
937  OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
938 }\
939 \
940 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
941  uint8_t full[16*9];\
942  uint8_t half[64];\
943  copy_block9(full, src, 16, stride, 9);\
944  put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
945  OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
946 }\
947 \
948 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
949  uint8_t full[16*9];\
950  copy_block9(full, src, 16, stride, 9);\
951  OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
952 }\
953 \
954 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
955  uint8_t full[16*9];\
956  uint8_t half[64];\
957  copy_block9(full, src, 16, stride, 9);\
958  put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
959  OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
960 }\
961 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
962  uint8_t full[16*9];\
963  uint8_t halfH[72];\
964  uint8_t halfV[64];\
965  uint8_t halfHV[64];\
966  copy_block9(full, src, 16, stride, 9);\
967  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
968  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
969  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
970  OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
971 }\
972 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
973  uint8_t full[16*9];\
974  uint8_t halfH[72];\
975  uint8_t halfHV[64];\
976  copy_block9(full, src, 16, stride, 9);\
977  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
978  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
979  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
980  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
981 }\
982 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
983  uint8_t full[16*9];\
984  uint8_t halfH[72];\
985  uint8_t halfV[64];\
986  uint8_t halfHV[64];\
987  copy_block9(full, src, 16, stride, 9);\
988  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
989  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
990  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
991  OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
992 }\
993 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
994  uint8_t full[16*9];\
995  uint8_t halfH[72];\
996  uint8_t halfHV[64];\
997  copy_block9(full, src, 16, stride, 9);\
998  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
999  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1000  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1001  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1002 }\
1003 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1004  uint8_t full[16*9];\
1005  uint8_t halfH[72];\
1006  uint8_t halfV[64];\
1007  uint8_t halfHV[64];\
1008  copy_block9(full, src, 16, stride, 9);\
1009  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1010  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1011  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1012  OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1013 }\
1014 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1015  uint8_t full[16*9];\
1016  uint8_t halfH[72];\
1017  uint8_t halfHV[64];\
1018  copy_block9(full, src, 16, stride, 9);\
1019  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1020  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1021  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1022  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1023 }\
1024 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1025  uint8_t full[16*9];\
1026  uint8_t halfH[72];\
1027  uint8_t halfV[64];\
1028  uint8_t halfHV[64];\
1029  copy_block9(full, src, 16, stride, 9);\
1030  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1031  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1032  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1033  OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1034 }\
1035 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1036  uint8_t full[16*9];\
1037  uint8_t halfH[72];\
1038  uint8_t halfHV[64];\
1039  copy_block9(full, src, 16, stride, 9);\
1040  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1041  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1042  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1043  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1044 }\
1045 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1046  uint8_t halfH[72];\
1047  uint8_t halfHV[64];\
1048  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1049  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1050  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1051 }\
1052 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1053  uint8_t halfH[72];\
1054  uint8_t halfHV[64];\
1055  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1056  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1057  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1058 }\
1059 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1060  uint8_t full[16*9];\
1061  uint8_t halfH[72];\
1062  uint8_t halfV[64];\
1063  uint8_t halfHV[64];\
1064  copy_block9(full, src, 16, stride, 9);\
1065  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1066  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1067  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1068  OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1069 }\
1070 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1071  uint8_t full[16*9];\
1072  uint8_t halfH[72];\
1073  copy_block9(full, src, 16, stride, 9);\
1074  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1075  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1076  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1077 }\
1078 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1079  uint8_t full[16*9];\
1080  uint8_t halfH[72];\
1081  uint8_t halfV[64];\
1082  uint8_t halfHV[64];\
1083  copy_block9(full, src, 16, stride, 9);\
1084  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1085  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1086  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1087  OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1088 }\
1089 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1090  uint8_t full[16*9];\
1091  uint8_t halfH[72];\
1092  copy_block9(full, src, 16, stride, 9);\
1093  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1094  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1095  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1096 }\
1097 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1098  uint8_t halfH[72];\
1099  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1100  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1101 }\
1102 \
1103 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1104  uint8_t half[256];\
1105  put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1106  OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1107 }\
1108 \
1109 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1110  OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1111 }\
1112 \
1113 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1114  uint8_t half[256];\
1115  put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1116  OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1117 }\
1118 \
1119 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1120  uint8_t full[24*17];\
1121  uint8_t half[256];\
1122  copy_block17(full, src, 24, stride, 17);\
1123  put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1124  OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1125 }\
1126 \
1127 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1128  uint8_t full[24*17];\
1129  copy_block17(full, src, 24, stride, 17);\
1130  OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1131 }\
1132 \
1133 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1134  uint8_t full[24*17];\
1135  uint8_t half[256];\
1136  copy_block17(full, src, 24, stride, 17);\
1137  put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1138  OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1139 }\
1140 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1141  uint8_t full[24*17];\
1142  uint8_t halfH[272];\
1143  uint8_t halfV[256];\
1144  uint8_t halfHV[256];\
1145  copy_block17(full, src, 24, stride, 17);\
1146  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1147  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1148  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1149  OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1150 }\
1151 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1152  uint8_t full[24*17];\
1153  uint8_t halfH[272];\
1154  uint8_t halfHV[256];\
1155  copy_block17(full, src, 24, stride, 17);\
1156  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1157  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1158  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1159  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1160 }\
1161 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1162  uint8_t full[24*17];\
1163  uint8_t halfH[272];\
1164  uint8_t halfV[256];\
1165  uint8_t halfHV[256];\
1166  copy_block17(full, src, 24, stride, 17);\
1167  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1168  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1169  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1170  OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1171 }\
1172 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1173  uint8_t full[24*17];\
1174  uint8_t halfH[272];\
1175  uint8_t halfHV[256];\
1176  copy_block17(full, src, 24, stride, 17);\
1177  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1178  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1179  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1180  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1181 }\
1182 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1183  uint8_t full[24*17];\
1184  uint8_t halfH[272];\
1185  uint8_t halfV[256];\
1186  uint8_t halfHV[256];\
1187  copy_block17(full, src, 24, stride, 17);\
1188  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1189  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1190  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1191  OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1192 }\
1193 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1194  uint8_t full[24*17];\
1195  uint8_t halfH[272];\
1196  uint8_t halfHV[256];\
1197  copy_block17(full, src, 24, stride, 17);\
1198  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1199  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1200  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1201  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1202 }\
1203 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1204  uint8_t full[24*17];\
1205  uint8_t halfH[272];\
1206  uint8_t halfV[256];\
1207  uint8_t halfHV[256];\
1208  copy_block17(full, src, 24, stride, 17);\
1209  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1210  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1211  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1212  OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1213 }\
1214 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1215  uint8_t full[24*17];\
1216  uint8_t halfH[272];\
1217  uint8_t halfHV[256];\
1218  copy_block17(full, src, 24, stride, 17);\
1219  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1220  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1221  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1222  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1223 }\
1224 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1225  uint8_t halfH[272];\
1226  uint8_t halfHV[256];\
1227  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1228  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1229  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1230 }\
1231 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1232  uint8_t halfH[272];\
1233  uint8_t halfHV[256];\
1234  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1235  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1236  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1237 }\
1238 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1239  uint8_t full[24*17];\
1240  uint8_t halfH[272];\
1241  uint8_t halfV[256];\
1242  uint8_t halfHV[256];\
1243  copy_block17(full, src, 24, stride, 17);\
1244  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1245  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1246  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1247  OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1248 }\
1249 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1250  uint8_t full[24*17];\
1251  uint8_t halfH[272];\
1252  copy_block17(full, src, 24, stride, 17);\
1253  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1254  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1255  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1256 }\
1257 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1258  uint8_t full[24*17];\
1259  uint8_t halfH[272];\
1260  uint8_t halfV[256];\
1261  uint8_t halfHV[256];\
1262  copy_block17(full, src, 24, stride, 17);\
1263  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1264  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1265  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1266  OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1267 }\
1268 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1269  uint8_t full[24*17];\
1270  uint8_t halfH[272];\
1271  copy_block17(full, src, 24, stride, 17);\
1272  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1273  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1274  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1275 }\
1276 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1277  uint8_t halfH[272];\
1278  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1279  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1280 }
1281 
1282 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1283 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1284 #define op_put(a, b) a = cm[((b) + 16)>>5]
1285 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1286 
1287 QPEL_MC(0, put_ , _ , op_put)
1288 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1289 QPEL_MC(0, avg_ , _ , op_avg)
1290 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1291 #undef op_avg
1292 #undef op_avg_no_rnd
1293 #undef op_put
1294 #undef op_put_no_rnd
1295 
1296 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1297 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1298 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1299 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1300 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1301 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1302 
1303 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1305  int i;
1306 
1307  for(i=0; i<h; i++){
1308  dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1309  dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1310  dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1311  dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1312  dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1313  dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1314  dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1315  dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1316  dst+=dstStride;
1317  src+=srcStride;
1318  }
1319 }
1320 
1321 #if CONFIG_RV40_DECODER
1323  put_pixels16_xy2_8_c(dst, src, stride, 16);
1324 }
1326  avg_pixels16_xy2_8_c(dst, src, stride, 16);
1327 }
1329  put_pixels8_xy2_8_c(dst, src, stride, 8);
1330 }
1332  avg_pixels8_xy2_8_c(dst, src, stride, 8);
1333 }
1334 #endif /* CONFIG_RV40_DECODER */
1335 
1336 #if CONFIG_DIRAC_DECODER
1337 #define DIRAC_MC(OPNAME)\
1338 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1339 {\
1340  OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
1341 }\
1342 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1343 {\
1344  OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
1345 }\
1346 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1347 {\
1348  OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
1349  OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
1350 }\
1351 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1352 {\
1353  OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1354 }\
1355 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1356 {\
1357  OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1358 }\
1359 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1360 {\
1361  OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
1362  OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
1363 }\
1364 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1365 {\
1366  OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1367 }\
1368 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1369 {\
1370  OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1371 }\
1372 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1373 {\
1374  OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
1375  OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
1376 }
1377 DIRAC_MC(put)
1378 DIRAC_MC(avg)
1379 #endif
1380 
1381 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1383  int i;
1384 
1385  for(i=0; i<w; i++){
1386  const int src_1= src[ -srcStride];
1387  const int src0 = src[0 ];
1388  const int src1 = src[ srcStride];
1389  const int src2 = src[2*srcStride];
1390  const int src3 = src[3*srcStride];
1391  const int src4 = src[4*srcStride];
1392  const int src5 = src[5*srcStride];
1393  const int src6 = src[6*srcStride];
1394  const int src7 = src[7*srcStride];
1395  const int src8 = src[8*srcStride];
1396  const int src9 = src[9*srcStride];
1397  dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1398  dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1399  dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1400  dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1401  dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1402  dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1403  dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1404  dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1405  src++;
1406  dst++;
1407  }
1408 }
1409 
1410 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1411  uint8_t half[64];
1412  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1413  put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1414 }
1415 
1416 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1417  wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1418 }
1419 
1420 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1421  uint8_t half[64];
1422  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1423  put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1424 }
1425 
1426 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1427  wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1428 }
1429 
1430 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1431  uint8_t halfH[88];
1432  uint8_t halfV[64];
1433  uint8_t halfHV[64];
1434  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1435  wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1436  wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1437  put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1438 }
1439 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1440  uint8_t halfH[88];
1441  uint8_t halfV[64];
1442  uint8_t halfHV[64];
1443  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1444  wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1445  wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1446  put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1447 }
1448 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1449  uint8_t halfH[88];
1450  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1451  wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1452 }
1453 
1454 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1455  if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1456  int x;
1457  const int strength= ff_h263_loop_filter_strength[qscale];
1458 
1459  for(x=0; x<8; x++){
1460  int d1, d2, ad1;
1461  int p0= src[x-2*stride];
1462  int p1= src[x-1*stride];
1463  int p2= src[x+0*stride];
1464  int p3= src[x+1*stride];
1465  int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1466 
1467  if (d<-2*strength) d1= 0;
1468  else if(d<- strength) d1=-2*strength - d;
1469  else if(d< strength) d1= d;
1470  else if(d< 2*strength) d1= 2*strength - d;
1471  else d1= 0;
1472 
1473  p1 += d1;
1474  p2 -= d1;
1475  if(p1&256) p1= ~(p1>>31);
1476  if(p2&256) p2= ~(p2>>31);
1477 
1478  src[x-1*stride] = p1;
1479  src[x+0*stride] = p2;
1480 
1481  ad1= FFABS(d1)>>1;
1482 
1483  d2= av_clip((p0-p3)/4, -ad1, ad1);
1484 
1485  src[x-2*stride] = p0 - d2;
1486  src[x+ stride] = p3 + d2;
1487  }
1488  }
1489 }
1490 
1491 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1492  if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1493  int y;
1494  const int strength= ff_h263_loop_filter_strength[qscale];
1495 
1496  for(y=0; y<8; y++){
1497  int d1, d2, ad1;
1498  int p0= src[y*stride-2];
1499  int p1= src[y*stride-1];
1500  int p2= src[y*stride+0];
1501  int p3= src[y*stride+1];
1502  int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1503 
1504  if (d<-2*strength) d1= 0;
1505  else if(d<- strength) d1=-2*strength - d;
1506  else if(d< strength) d1= d;
1507  else if(d< 2*strength) d1= 2*strength - d;
1508  else d1= 0;
1509 
1510  p1 += d1;
1511  p2 -= d1;
1512  if(p1&256) p1= ~(p1>>31);
1513  if(p2&256) p2= ~(p2>>31);
1514 
1515  src[y*stride-1] = p1;
1516  src[y*stride+0] = p2;
1517 
1518  ad1= FFABS(d1)>>1;
1519 
1520  d2= av_clip((p0-p3)/4, -ad1, ad1);
1521 
1522  src[y*stride-2] = p0 - d2;
1523  src[y*stride+1] = p3 + d2;
1524  }
1525  }
1526 }
1527 
1528 static void h261_loop_filter_c(uint8_t *src, int stride){
1529  int x,y,xy,yz;
1530  int temp[64];
1531 
1532  for(x=0; x<8; x++){
1533  temp[x ] = 4*src[x ];
1534  temp[x + 7*8] = 4*src[x + 7*stride];
1535  }
1536  for(y=1; y<7; y++){
1537  for(x=0; x<8; x++){
1538  xy = y * stride + x;
1539  yz = y * 8 + x;
1540  temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1541  }
1542  }
1543 
1544  for(y=0; y<8; y++){
1545  src[ y*stride] = (temp[ y*8] + 2)>>2;
1546  src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1547  for(x=1; x<7; x++){
1548  xy = y * stride + x;
1549  yz = y * 8 + x;
1550  src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1551  }
1552  }
1553 }
1554 
1555 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1556 {
1557  int s, i;
1558 
1559  s = 0;
1560  for(i=0;i<h;i++) {
1561  s += abs(pix1[0] - pix2[0]);
1562  s += abs(pix1[1] - pix2[1]);
1563  s += abs(pix1[2] - pix2[2]);
1564  s += abs(pix1[3] - pix2[3]);
1565  s += abs(pix1[4] - pix2[4]);
1566  s += abs(pix1[5] - pix2[5]);
1567  s += abs(pix1[6] - pix2[6]);
1568  s += abs(pix1[7] - pix2[7]);
1569  s += abs(pix1[8] - pix2[8]);
1570  s += abs(pix1[9] - pix2[9]);
1571  s += abs(pix1[10] - pix2[10]);
1572  s += abs(pix1[11] - pix2[11]);
1573  s += abs(pix1[12] - pix2[12]);
1574  s += abs(pix1[13] - pix2[13]);
1575  s += abs(pix1[14] - pix2[14]);
1576  s += abs(pix1[15] - pix2[15]);
1577  pix1 += line_size;
1578  pix2 += line_size;
1579  }
1580  return s;
1581 }
1582 
1583 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1584 {
1585  int s, i;
1586 
1587  s = 0;
1588  for(i=0;i<h;i++) {
1589  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1590  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1591  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1592  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1593  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1594  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1595  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1596  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1597  s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1598  s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1599  s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1600  s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1601  s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1602  s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1603  s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1604  s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1605  pix1 += line_size;
1606  pix2 += line_size;
1607  }
1608  return s;
1609 }
1610 
1611 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1612 {
1613  int s, i;
1614  uint8_t *pix3 = pix2 + line_size;
1615 
1616  s = 0;
1617  for(i=0;i<h;i++) {
1618  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1619  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1620  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1621  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1622  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1623  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1624  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1625  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1626  s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1627  s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1628  s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1629  s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1630  s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1631  s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1632  s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1633  s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1634  pix1 += line_size;
1635  pix2 += line_size;
1636  pix3 += line_size;
1637  }
1638  return s;
1639 }
1640 
1641 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1642 {
1643  int s, i;
1644  uint8_t *pix3 = pix2 + line_size;
1645 
1646  s = 0;
1647  for(i=0;i<h;i++) {
1648  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1649  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1650  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1651  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1652  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1653  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1654  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1655  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1656  s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1657  s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1658  s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1659  s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1660  s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1661  s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1662  s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1663  s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1664  pix1 += line_size;
1665  pix2 += line_size;
1666  pix3 += line_size;
1667  }
1668  return s;
1669 }
1670 
1671 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1672 {
1673  int s, i;
1674 
1675  s = 0;
1676  for(i=0;i<h;i++) {
1677  s += abs(pix1[0] - pix2[0]);
1678  s += abs(pix1[1] - pix2[1]);
1679  s += abs(pix1[2] - pix2[2]);
1680  s += abs(pix1[3] - pix2[3]);
1681  s += abs(pix1[4] - pix2[4]);
1682  s += abs(pix1[5] - pix2[5]);
1683  s += abs(pix1[6] - pix2[6]);
1684  s += abs(pix1[7] - pix2[7]);
1685  pix1 += line_size;
1686  pix2 += line_size;
1687  }
1688  return s;
1689 }
1690 
1691 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1692 {
1693  int s, i;
1694 
1695  s = 0;
1696  for(i=0;i<h;i++) {
1697  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1698  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1699  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1700  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1701  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1702  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1703  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1704  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1705  pix1 += line_size;
1706  pix2 += line_size;
1707  }
1708  return s;
1709 }
1710 
1711 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1712 {
1713  int s, i;
1714  uint8_t *pix3 = pix2 + line_size;
1715 
1716  s = 0;
1717  for(i=0;i<h;i++) {
1718  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1719  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1720  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1721  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1722  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1723  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1724  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1725  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1726  pix1 += line_size;
1727  pix2 += line_size;
1728  pix3 += line_size;
1729  }
1730  return s;
1731 }
1732 
1733 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1734 {
1735  int s, i;
1736  uint8_t *pix3 = pix2 + line_size;
1737 
1738  s = 0;
1739  for(i=0;i<h;i++) {
1740  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1741  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1742  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1743  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1744  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1745  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1746  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1747  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1748  pix1 += line_size;
1749  pix2 += line_size;
1750  pix3 += line_size;
1751  }
1752  return s;
1753 }
1754 
1755 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1756  MpegEncContext *c = v;
1757  int score1=0;
1758  int score2=0;
1759  int x,y;
1760 
1761  for(y=0; y<h; y++){
1762  for(x=0; x<16; x++){
1763  score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1764  }
1765  if(y+1<h){
1766  for(x=0; x<15; x++){
1767  score2+= FFABS( s1[x ] - s1[x +stride]
1768  - s1[x+1] + s1[x+1+stride])
1769  -FFABS( s2[x ] - s2[x +stride]
1770  - s2[x+1] + s2[x+1+stride]);
1771  }
1772  }
1773  s1+= stride;
1774  s2+= stride;
1775  }
1776 
1777  if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1778  else return score1 + FFABS(score2)*8;
1779 }
1780 
1781 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1782  MpegEncContext *c = v;
1783  int score1=0;
1784  int score2=0;
1785  int x,y;
1786 
1787  for(y=0; y<h; y++){
1788  for(x=0; x<8; x++){
1789  score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1790  }
1791  if(y+1<h){
1792  for(x=0; x<7; x++){
1793  score2+= FFABS( s1[x ] - s1[x +stride]
1794  - s1[x+1] + s1[x+1+stride])
1795  -FFABS( s2[x ] - s2[x +stride]
1796  - s2[x+1] + s2[x+1+stride]);
1797  }
1798  }
1799  s1+= stride;
1800  s2+= stride;
1801  }
1802 
1803  if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1804  else return score1 + FFABS(score2)*8;
1805 }
1806 
1807 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1808  int i;
1809  unsigned int sum=0;
1810 
1811  for(i=0; i<8*8; i++){
1812  int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1813  int w= weight[i];
1814  b>>= RECON_SHIFT;
1815  av_assert2(-512<b && b<512);
1816 
1817  sum += (w*b)*(w*b)>>4;
1818  }
1819  return sum>>2;
1820 }
1821 
1822 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1823  int i;
1824 
1825  for(i=0; i<8*8; i++){
1826  rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1827  }
1828 }
1829 
1830 /**
1831  * Permute an 8x8 block.
1832  * @param block the block which will be permuted according to the given permutation vector
1833  * @param permutation the permutation vector
1834  * @param last the last non zero coefficient in scantable order, used to speed the permutation up
1835  * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1836  * (inverse) permutated to scantable order!
1837  */
1838 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1839 {
1840  int i;
1841  DCTELEM temp[64];
1842 
1843  if(last<=0) return;
1844  //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
1845 
1846  for(i=0; i<=last; i++){
1847  const int j= scantable[i];
1848  temp[j]= block[j];
1849  block[j]=0;
1850  }
1851 
1852  for(i=0; i<=last; i++){
1853  const int j= scantable[i];
1854  const int perm_j= permutation[j];
1855  block[perm_j]= temp[j];
1856  }
1857 }
1858 
1859 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1860  return 0;
1861 }
1862 
1864  int i;
1865 
1866  memset(cmp, 0, sizeof(void*)*6);
1867 
1868  for(i=0; i<6; i++){
1869  switch(type&0xFF){
1870  case FF_CMP_SAD:
1871  cmp[i]= c->sad[i];
1872  break;
1873  case FF_CMP_SATD:
1874  cmp[i]= c->hadamard8_diff[i];
1875  break;
1876  case FF_CMP_SSE:
1877  cmp[i]= c->sse[i];
1878  break;
1879  case FF_CMP_DCT:
1880  cmp[i]= c->dct_sad[i];
1881  break;
1882  case FF_CMP_DCT264:
1883  cmp[i]= c->dct264_sad[i];
1884  break;
1885  case FF_CMP_DCTMAX:
1886  cmp[i]= c->dct_max[i];
1887  break;
1888  case FF_CMP_PSNR:
1889  cmp[i]= c->quant_psnr[i];
1890  break;
1891  case FF_CMP_BIT:
1892  cmp[i]= c->bit[i];
1893  break;
1894  case FF_CMP_RD:
1895  cmp[i]= c->rd[i];
1896  break;
1897  case FF_CMP_VSAD:
1898  cmp[i]= c->vsad[i];
1899  break;
1900  case FF_CMP_VSSE:
1901  cmp[i]= c->vsse[i];
1902  break;
1903  case FF_CMP_ZERO:
1904  cmp[i]= zero_cmp;
1905  break;
1906  case FF_CMP_NSSE:
1907  cmp[i]= c->nsse[i];
1908  break;
1909 #if CONFIG_DWT
1910  case FF_CMP_W53:
1911  cmp[i]= c->w53[i];
1912  break;
1913  case FF_CMP_W97:
1914  cmp[i]= c->w97[i];
1915  break;
1916 #endif
1917  default:
1918  av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1919  }
1920  }
1921 }
1922 
1923 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1924  long i;
1925  for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1926  long a = *(long*)(src+i);
1927  long b = *(long*)(dst+i);
1928  *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1929  }
1930  for(; i<w; i++)
1931  dst[i+0] += src[i+0];
1932 }
1933 
1934 static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
1935  long i;
1936 #if !HAVE_FAST_UNALIGNED
1937  if((long)src2 & (sizeof(long)-1)){
1938  for(i=0; i+7<w; i+=8){
1939  dst[i+0] = src1[i+0]-src2[i+0];
1940  dst[i+1] = src1[i+1]-src2[i+1];
1941  dst[i+2] = src1[i+2]-src2[i+2];
1942  dst[i+3] = src1[i+3]-src2[i+3];
1943  dst[i+4] = src1[i+4]-src2[i+4];
1944  dst[i+5] = src1[i+5]-src2[i+5];
1945  dst[i+6] = src1[i+6]-src2[i+6];
1946  dst[i+7] = src1[i+7]-src2[i+7];
1947  }
1948  }else
1949 #endif
1950  for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1951  long a = *(long*)(src1+i);
1952  long b = *(long*)(src2+i);
1953  *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1954  }
1955  for(; i<w; i++)
1956  dst[i+0] = src1[i+0]-src2[i+0];
1957 }
1958 
1959 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1960  int i;
1961  uint8_t l, lt;
1962 
1963  l= *left;
1964  lt= *left_top;
1965 
1966  for(i=0; i<w; i++){
1967  l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1968  lt= src1[i];
1969  dst[i]= l;
1970  }
1971 
1972  *left= l;
1973  *left_top= lt;
1974 }
1975 
1976 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1977  int i;
1978  uint8_t l, lt;
1979 
1980  l= *left;
1981  lt= *left_top;
1982 
1983  for(i=0; i<w; i++){
1984  const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1985  lt= src1[i];
1986  l= src2[i];
1987  dst[i]= l - pred;
1988  }
1989 
1990  *left= l;
1991  *left_top= lt;
1992 }
1993 
1994 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1995  int i;
1996 
1997  for(i=0; i<w-1; i++){
1998  acc+= src[i];
1999  dst[i]= acc;
2000  i++;
2001  acc+= src[i];
2002  dst[i]= acc;
2003  }
2004 
2005  for(; i<w; i++){
2006  acc+= src[i];
2007  dst[i]= acc;
2008  }
2009 
2010  return acc;
2011 }
2012 
2013 #if HAVE_BIGENDIAN
2014 #define B 3
2015 #define G 2
2016 #define R 1
2017 #define A 0
2018 #else
2019 #define B 0
2020 #define G 1
2021 #define R 2
2022 #define A 3
2023 #endif
2024 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
2025  int i;
2026  int r,g,b,a;
2027  r= *red;
2028  g= *green;
2029  b= *blue;
2030  a= *alpha;
2031 
2032  for(i=0; i<w; i++){
2033  b+= src[4*i+B];
2034  g+= src[4*i+G];
2035  r+= src[4*i+R];
2036  a+= src[4*i+A];
2037 
2038  dst[4*i+B]= b;
2039  dst[4*i+G]= g;
2040  dst[4*i+R]= r;
2041  dst[4*i+A]= a;
2042  }
2043 
2044  *red= r;
2045  *green= g;
2046  *blue= b;
2047  *alpha= a;
2048 }
2049 #undef B
2050 #undef G
2051 #undef R
2052 #undef A
2053 
2054 #define BUTTERFLY2(o1,o2,i1,i2) \
2055 o1= (i1)+(i2);\
2056 o2= (i1)-(i2);
2057 
2058 #define BUTTERFLY1(x,y) \
2059 {\
2060  int a,b;\
2061  a= x;\
2062  b= y;\
2063  x= a+b;\
2064  y= a-b;\
2065 }
2066 
2067 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2068 
2069 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
2070  int i;
2071  int temp[64];
2072  int sum=0;
2073 
2074  av_assert2(h==8);
2075 
2076  for(i=0; i<8; i++){
2077  //FIXME try pointer walks
2078  BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2079  BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2080  BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2081  BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2082 
2083  BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2084  BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2085  BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2086  BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2087 
2088  BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2089  BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2090  BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2091  BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2092  }
2093 
2094  for(i=0; i<8; i++){
2095  BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2096  BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2097  BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2098  BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2099 
2100  BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2101  BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2102  BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2103  BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2104 
2105  sum +=
2106  BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2107  +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2108  +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2109  +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2110  }
2111  return sum;
2112 }
2113 
2114 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2115  int i;
2116  int temp[64];
2117  int sum=0;
2118 
2119  av_assert2(h==8);
2120 
2121  for(i=0; i<8; i++){
2122  //FIXME try pointer walks
2123  BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2124  BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2125  BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2126  BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2127 
2128  BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2129  BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2130  BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2131  BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2132 
2133  BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2134  BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2135  BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2136  BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2137  }
2138 
2139  for(i=0; i<8; i++){
2140  BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2141  BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2142  BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2143  BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2144 
2145  BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2146  BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2147  BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2148  BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2149 
2150  sum +=
2151  BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2152  +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2153  +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2154  +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2155  }
2156 
2157  sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2158 
2159  return sum;
2160 }
2161 
2162 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2163  MpegEncContext * const s= (MpegEncContext *)c;
2164  LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2165 
2166  av_assert2(h==8);
2167 
2168  s->dsp.diff_pixels(temp, src1, src2, stride);
2169  s->dsp.fdct(temp);
2170  return s->dsp.sum_abs_dctelem(temp);
2171 }
2172 
2173 #if CONFIG_GPL
2174 #define DCT8_1D {\
2175  const int s07 = SRC(0) + SRC(7);\
2176  const int s16 = SRC(1) + SRC(6);\
2177  const int s25 = SRC(2) + SRC(5);\
2178  const int s34 = SRC(3) + SRC(4);\
2179  const int a0 = s07 + s34;\
2180  const int a1 = s16 + s25;\
2181  const int a2 = s07 - s34;\
2182  const int a3 = s16 - s25;\
2183  const int d07 = SRC(0) - SRC(7);\
2184  const int d16 = SRC(1) - SRC(6);\
2185  const int d25 = SRC(2) - SRC(5);\
2186  const int d34 = SRC(3) - SRC(4);\
2187  const int a4 = d16 + d25 + (d07 + (d07>>1));\
2188  const int a5 = d07 - d34 - (d25 + (d25>>1));\
2189  const int a6 = d07 + d34 - (d16 + (d16>>1));\
2190  const int a7 = d16 - d25 + (d34 + (d34>>1));\
2191  DST(0, a0 + a1 ) ;\
2192  DST(1, a4 + (a7>>2)) ;\
2193  DST(2, a2 + (a3>>1)) ;\
2194  DST(3, a5 + (a6>>2)) ;\
2195  DST(4, a0 - a1 ) ;\
2196  DST(5, a6 - (a5>>2)) ;\
2197  DST(6, (a2>>1) - a3 ) ;\
2198  DST(7, (a4>>2) - a7 ) ;\
2199 }
2200 
2201 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2202  MpegEncContext * const s= (MpegEncContext *)c;
2203  DCTELEM dct[8][8];
2204  int i;
2205  int sum=0;
2206 
2207  s->dsp.diff_pixels(dct[0], src1, src2, stride);
2208 
2209 #define SRC(x) dct[i][x]
2210 #define DST(x,v) dct[i][x]= v
2211  for( i = 0; i < 8; i++ )
2212  DCT8_1D
2213 #undef SRC
2214 #undef DST
2215 
2216 #define SRC(x) dct[x][i]
2217 #define DST(x,v) sum += FFABS(v)
2218  for( i = 0; i < 8; i++ )
2219  DCT8_1D
2220 #undef SRC
2221 #undef DST
2222  return sum;
2223 }
2224 #endif
2225 
2226 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2227  MpegEncContext * const s= (MpegEncContext *)c;
2228  LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2229  int sum=0, i;
2230 
2231  av_assert2(h==8);
2232 
2233  s->dsp.diff_pixels(temp, src1, src2, stride);
2234  s->dsp.fdct(temp);
2235 
2236  for(i=0; i<64; i++)
2237  sum= FFMAX(sum, FFABS(temp[i]));
2238 
2239  return sum;
2240 }
2241 
2242 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2243  MpegEncContext * const s= (MpegEncContext *)c;
2244  LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2245  DCTELEM * const bak = temp+64;
2246  int sum=0, i;
2247 
2248  av_assert2(h==8);
2249  s->mb_intra=0;
2250 
2251  s->dsp.diff_pixels(temp, src1, src2, stride);
2252 
2253  memcpy(bak, temp, 64*sizeof(DCTELEM));
2254 
2255  s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2256  s->dct_unquantize_inter(s, temp, 0, s->qscale);
2257  ff_simple_idct_8(temp); //FIXME
2258 
2259  for(i=0; i<64; i++)
2260  sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2261 
2262  return sum;
2263 }
2264 
2265 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2266  MpegEncContext * const s= (MpegEncContext *)c;
2267  const uint8_t *scantable= s->intra_scantable.permutated;
2268  LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2269  LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2270  LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2271  int i, last, run, bits, level, distortion, start_i;
2272  const int esc_length= s->ac_esc_length;
2273  uint8_t * length;
2274  uint8_t * last_length;
2275 
2276  av_assert2(h==8);
2277 
2278  copy_block8(lsrc1, src1, 8, stride, 8);
2279  copy_block8(lsrc2, src2, 8, stride, 8);
2280 
2281  s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2282 
2283  s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2284 
2285  bits=0;
2286 
2287  if (s->mb_intra) {
2288  start_i = 1;
2289  length = s->intra_ac_vlc_length;
2290  last_length= s->intra_ac_vlc_last_length;
2291  bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2292  } else {
2293  start_i = 0;
2294  length = s->inter_ac_vlc_length;
2295  last_length= s->inter_ac_vlc_last_length;
2296  }
2297 
2298  if(last>=start_i){
2299  run=0;
2300  for(i=start_i; i<last; i++){
2301  int j= scantable[i];
2302  level= temp[j];
2303 
2304  if(level){
2305  level+=64;
2306  if((level&(~127)) == 0){
2307  bits+= length[UNI_AC_ENC_INDEX(run, level)];
2308  }else
2309  bits+= esc_length;
2310  run=0;
2311  }else
2312  run++;
2313  }
2314  i= scantable[last];
2315 
2316  level= temp[i] + 64;
2317 
2318  av_assert2(level - 64);
2319 
2320  if((level&(~127)) == 0){
2321  bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2322  }else
2323  bits+= esc_length;
2324 
2325  }
2326 
2327  if(last>=0){
2328  if(s->mb_intra)
2329  s->dct_unquantize_intra(s, temp, 0, s->qscale);
2330  else
2331  s->dct_unquantize_inter(s, temp, 0, s->qscale);
2332  }
2333 
2334  s->dsp.idct_add(lsrc2, 8, temp);
2335 
2336  distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2337 
2338  return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2339 }
2340 
2341 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2342  MpegEncContext * const s= (MpegEncContext *)c;
2343  const uint8_t *scantable= s->intra_scantable.permutated;
2344  LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2345  int i, last, run, bits, level, start_i;
2346  const int esc_length= s->ac_esc_length;
2347  uint8_t * length;
2348  uint8_t * last_length;
2349 
2350  av_assert2(h==8);
2351 
2352  s->dsp.diff_pixels(temp, src1, src2, stride);
2353 
2354  s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2355 
2356  bits=0;
2357 
2358  if (s->mb_intra) {
2359  start_i = 1;
2360  length = s->intra_ac_vlc_length;
2361  last_length= s->intra_ac_vlc_last_length;
2362  bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2363  } else {
2364  start_i = 0;
2365  length = s->inter_ac_vlc_length;
2366  last_length= s->inter_ac_vlc_last_length;
2367  }
2368 
2369  if(last>=start_i){
2370  run=0;
2371  for(i=start_i; i<last; i++){
2372  int j= scantable[i];
2373  level= temp[j];
2374 
2375  if(level){
2376  level+=64;
2377  if((level&(~127)) == 0){
2378  bits+= length[UNI_AC_ENC_INDEX(run, level)];
2379  }else
2380  bits+= esc_length;
2381  run=0;
2382  }else
2383  run++;
2384  }
2385  i= scantable[last];
2386 
2387  level= temp[i] + 64;
2388 
2389  av_assert2(level - 64);
2390 
2391  if((level&(~127)) == 0){
2392  bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2393  }else
2394  bits+= esc_length;
2395  }
2396 
2397  return bits;
2398 }
2399 
2400 #define VSAD_INTRA(size) \
2401 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2402  int score=0; \
2403  int x,y; \
2404  \
2405  for(y=1; y<h; y++){ \
2406  for(x=0; x<size; x+=4){ \
2407  score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2408  +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2409  } \
2410  s+= stride; \
2411  } \
2412  \
2413  return score; \
2414 }
2415 VSAD_INTRA(8)
2416 VSAD_INTRA(16)
2417 
2418 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2419  int score=0;
2420  int x,y;
2421 
2422  for(y=1; y<h; y++){
2423  for(x=0; x<16; x++){
2424  score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2425  }
2426  s1+= stride;
2427  s2+= stride;
2428  }
2429 
2430  return score;
2431 }
2432 
2433 #define SQ(a) ((a)*(a))
2434 #define VSSE_INTRA(size) \
2435 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2436  int score=0; \
2437  int x,y; \
2438  \
2439  for(y=1; y<h; y++){ \
2440  for(x=0; x<size; x+=4){ \
2441  score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2442  +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2443  } \
2444  s+= stride; \
2445  } \
2446  \
2447  return score; \
2448 }
2449 VSSE_INTRA(8)
2450 VSSE_INTRA(16)
2451 
2452 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2453  int score=0;
2454  int x,y;
2455 
2456  for(y=1; y<h; y++){
2457  for(x=0; x<16; x++){
2458  score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2459  }
2460  s1+= stride;
2461  s2+= stride;
2462  }
2463 
2464  return score;
2465 }
2466 
2467 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2468  int size){
2469  int score=0;
2470  int i;
2471  for(i=0; i<size; i++)
2472  score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2473  return score;
2474 }
2475 
2476 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2477 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2478 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2479 #if CONFIG_GPL
2480 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2481 #endif
2482 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2483 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2484 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2485 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2486 
2487 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2488  int i;
2489  src1 += len-1;
2490  for(i=0; i<len; i++)
2491  dst[i] = src0[i] * src1[-i];
2492 }
2493 
2494 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
2495  int i;
2496  for(i=0; i<len; i++)
2497  dst[i] = src0[i] * src1[i] + src2[i];
2498 }
2499 
2500 static void vector_fmul_window_c(float *dst, const float *src0,
2501  const float *src1, const float *win, int len)
2502 {
2503  int i,j;
2504  dst += len;
2505  win += len;
2506  src0+= len;
2507  for(i=-len, j=len-1; i<0; i++, j--) {
2508  float s0 = src0[i];
2509  float s1 = src1[j];
2510  float wi = win[i];
2511  float wj = win[j];
2512  dst[i] = s0*wj - s1*wi;
2513  dst[j] = s0*wi + s1*wj;
2514  }
2515 }
2516 
2517 static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
2518  int len)
2519 {
2520  int i;
2521  for (i = 0; i < len; i++) {
2522  float t = v1[i] - v2[i];
2523  v1[i] += v2[i];
2524  v2[i] = t;
2525  }
2526 }
2527 
2528 static void butterflies_float_interleave_c(float *dst, const float *src0,
2529  const float *src1, int len)
2530 {
2531  int i;
2532  for (i = 0; i < len; i++) {
2533  float f1 = src0[i];
2534  float f2 = src1[i];
2535  dst[2*i ] = f1 + f2;
2536  dst[2*i + 1] = f1 - f2;
2537  }
2538 }
2539 
2540 float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
2541 {
2542  float p = 0.0;
2543  int i;
2544 
2545  for (i = 0; i < len; i++)
2546  p += v1[i] * v2[i];
2547 
2548  return p;
2549 }
2550 
2551 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2552  uint32_t maxi, uint32_t maxisign)
2553 {
2554 
2555  if(a > mini) return mini;
2556  else if((a^(1U<<31)) > maxisign) return maxi;
2557  else return a;
2558 }
2559 
2560 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2561  int i;
2562  uint32_t mini = *(uint32_t*)min;
2563  uint32_t maxi = *(uint32_t*)max;
2564  uint32_t maxisign = maxi ^ (1U<<31);
2565  uint32_t *dsti = (uint32_t*)dst;
2566  const uint32_t *srci = (const uint32_t*)src;
2567  for(i=0; i<len; i+=8) {
2568  dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2569  dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2570  dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2571  dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2572  dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2573  dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2574  dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2575  dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2576  }
2577 }
2578 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2579  int i;
2580  if(min < 0 && max > 0) {
2581  vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2582  } else {
2583  for(i=0; i < len; i+=8) {
2584  dst[i ] = av_clipf(src[i ], min, max);
2585  dst[i + 1] = av_clipf(src[i + 1], min, max);
2586  dst[i + 2] = av_clipf(src[i + 2], min, max);
2587  dst[i + 3] = av_clipf(src[i + 3], min, max);
2588  dst[i + 4] = av_clipf(src[i + 4], min, max);
2589  dst[i + 5] = av_clipf(src[i + 5], min, max);
2590  dst[i + 6] = av_clipf(src[i + 6], min, max);
2591  dst[i + 7] = av_clipf(src[i + 7], min, max);
2592  }
2593  }
2594 }
2595 
2596 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2597 {
2598  int res = 0;
2599 
2600  while (order--)
2601  res += *v1++ * *v2++;
2602 
2603  return res;
2604 }
2605 
2606 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2607 {
2608  int res = 0;
2609  while (order--) {
2610  res += *v1 * *v2++;
2611  *v1++ += mul * *v3++;
2612  }
2613  return res;
2614 }
2615 
2616 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2617  const int16_t *window, unsigned int len)
2618 {
2619  int i;
2620  int len2 = len >> 1;
2621 
2622  for (i = 0; i < len2; i++) {
2623  int16_t w = window[i];
2624  output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2625  output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2626  }
2627 }
2628 
2629 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2630  int32_t max, unsigned int len)
2631 {
2632  do {
2633  *dst++ = av_clip(*src++, min, max);
2634  *dst++ = av_clip(*src++, min, max);
2635  *dst++ = av_clip(*src++, min, max);
2636  *dst++ = av_clip(*src++, min, max);
2637  *dst++ = av_clip(*src++, min, max);
2638  *dst++ = av_clip(*src++, min, max);
2639  *dst++ = av_clip(*src++, min, max);
2640  *dst++ = av_clip(*src++, min, max);
2641  len -= 8;
2642  } while (len > 0);
2643 }
2644 
2645 #define W0 2048
2646 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2647 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2648 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2649 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2650 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2651 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2652 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2653 
2654 static void wmv2_idct_row(short * b)
2655 {
2656  int s1,s2;
2657  int a0,a1,a2,a3,a4,a5,a6,a7;
2658  /*step 1*/
2659  a1 = W1*b[1]+W7*b[7];
2660  a7 = W7*b[1]-W1*b[7];
2661  a5 = W5*b[5]+W3*b[3];
2662  a3 = W3*b[5]-W5*b[3];
2663  a2 = W2*b[2]+W6*b[6];
2664  a6 = W6*b[2]-W2*b[6];
2665  a0 = W0*b[0]+W0*b[4];
2666  a4 = W0*b[0]-W0*b[4];
2667  /*step 2*/
2668  s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2669  s2 = (181*(a1-a5-a7+a3)+128)>>8;
2670  /*step 3*/
2671  b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2672  b[1] = (a4+a6 +s1 + (1<<7))>>8;
2673  b[2] = (a4-a6 +s2 + (1<<7))>>8;
2674  b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2675  b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2676  b[5] = (a4-a6 -s2 + (1<<7))>>8;
2677  b[6] = (a4+a6 -s1 + (1<<7))>>8;
2678  b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2679 }
2680 static void wmv2_idct_col(short * b)
2681 {
2682  int s1,s2;
2683  int a0,a1,a2,a3,a4,a5,a6,a7;
2684  /*step 1, with extended precision*/
2685  a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2686  a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2687  a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2688  a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2689  a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2690  a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2691  a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2692  a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2693  /*step 2*/
2694  s1 = (181*(a1-a5+a7-a3)+128)>>8;
2695  s2 = (181*(a1-a5-a7+a3)+128)>>8;
2696  /*step 3*/
2697  b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2698  b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2699  b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2700  b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2701 
2702  b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2703  b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2704  b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2705  b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2706 }
2707 void ff_wmv2_idct_c(short * block){
2708  int i;
2709 
2710  for(i=0;i<64;i+=8){
2711  wmv2_idct_row(block+i);
2712  }
2713  for(i=0;i<8;i++){
2714  wmv2_idct_col(block+i);
2715  }
2716 }
2717 /* XXX: those functions should be suppressed ASAP when all IDCTs are
2718  converted */
2719 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2720 {
2721  ff_wmv2_idct_c(block);
2722  put_pixels_clamped_c(block, dest, line_size);
2723 }
2724 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2725 {
2726  ff_wmv2_idct_c(block);
2727  add_pixels_clamped_c(block, dest, line_size);
2728 }
2729 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2730 {
2731  ff_j_rev_dct (block);
2732  put_pixels_clamped_c(block, dest, line_size);
2733 }
2734 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2735 {
2736  ff_j_rev_dct (block);
2737  add_pixels_clamped_c(block, dest, line_size);
2738 }
2739 
2740 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
2741 {
2742  ff_j_rev_dct4 (block);
2743  put_pixels_clamped4_c(block, dest, line_size);
2744 }
2745 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
2746 {
2747  ff_j_rev_dct4 (block);
2748  add_pixels_clamped4_c(block, dest, line_size);
2749 }
2750 
2751 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
2752 {
2753  ff_j_rev_dct2 (block);
2754  put_pixels_clamped2_c(block, dest, line_size);
2755 }
2756 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
2757 {
2758  ff_j_rev_dct2 (block);
2759  add_pixels_clamped2_c(block, dest, line_size);
2760 }
2761 
2762 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
2763 {
2764  dest[0] = av_clip_uint8((block[0] + 4)>>3);
2765 }
2766 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
2767 {
2768  dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
2769 }
2770 
2771 /* init static data */
2773 {
2774  int i;
2775 
2776  for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2777  for(i=0;i<MAX_NEG_CROP;i++) {
2778  ff_cropTbl[i] = 0;
2779  ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2780  }
2781 
2782  for(i=0;i<512;i++) {
2783  ff_squareTbl[i] = (i - 256) * (i - 256);
2784  }
2785 
2786  for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2787 }
2788 
2790  static int did_fail=0;
2791  LOCAL_ALIGNED_16(int, aligned, [4]);
2792 
2793  if((intptr_t)aligned & 15){
2794  if(!did_fail){
2795 #if HAVE_MMX || HAVE_ALTIVEC
2797  "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2798  "and may be very slow or crash. This is not a bug in libavcodec,\n"
2799  "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2800  "Do not report crashes to FFmpeg developers.\n");
2801 #endif
2802  did_fail=1;
2803  }
2804  return -1;
2805  }
2806  return 0;
2807 }
2808 
2810 {
2811  int i, j;
2812 
2814 
2815 #if CONFIG_ENCODERS
2816  if (avctx->bits_per_raw_sample == 10) {
2819  } else {
2820  if(avctx->dct_algo==FF_DCT_FASTINT) {
2821  c->fdct = ff_fdct_ifast;
2823  }
2824  else if(avctx->dct_algo==FF_DCT_FAAN) {
2825  c->fdct = ff_faandct;
2826  c->fdct248 = ff_faandct248;
2827  }
2828  else {
2829  c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2831  }
2832  }
2833 #endif //CONFIG_ENCODERS
2834 
2835  if(avctx->lowres==1){
2838  c->idct = ff_j_rev_dct4;
2840  }else if(avctx->lowres==2){
2843  c->idct = ff_j_rev_dct2;
2845  }else if(avctx->lowres==3){
2848  c->idct = ff_j_rev_dct1;
2850  }else{
2851  if (avctx->bits_per_raw_sample == 10) {
2854  c->idct = ff_simple_idct_10;
2856  } else {
2857  if(avctx->idct_algo==FF_IDCT_INT){
2860  c->idct = ff_j_rev_dct;
2862  }else if(avctx->idct_algo==FF_IDCT_WMV2){
2865  c->idct = ff_wmv2_idct_c;
2867  }else if(avctx->idct_algo==FF_IDCT_FAAN){
2870  c->idct = ff_faanidct;
2872  }else{ //accurate/default
2875  c->idct = ff_simple_idct_8;
2877  }
2878  }
2879  }
2880 
2886  c->gmc1 = gmc1_c;
2887  c->gmc = ff_gmc_c;
2888  c->pix_sum = pix_sum_c;
2889  c->pix_norm1 = pix_norm1_c;
2890 
2892  c->fill_block_tab[1] = fill_block8_c;
2893 
2894  /* TODO [0] 16 [1] 8 */
2895  c->pix_abs[0][0] = pix_abs16_c;
2896  c->pix_abs[0][1] = pix_abs16_x2_c;
2897  c->pix_abs[0][2] = pix_abs16_y2_c;
2898  c->pix_abs[0][3] = pix_abs16_xy2_c;
2899  c->pix_abs[1][0] = pix_abs8_c;
2900  c->pix_abs[1][1] = pix_abs8_x2_c;
2901  c->pix_abs[1][2] = pix_abs8_y2_c;
2902  c->pix_abs[1][3] = pix_abs8_xy2_c;
2903 
2913 
2923 
2924 #define dspfunc(PFX, IDX, NUM) \
2925  c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2926  c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2927  c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2928  c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2929  c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2930  c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2931  c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2932  c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2933  c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2934  c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2935  c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2936  c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2937  c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2938  c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2939  c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2940  c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2941 
2942  dspfunc(put_qpel, 0, 16);
2943  dspfunc(put_no_rnd_qpel, 0, 16);
2944 
2945  dspfunc(avg_qpel, 0, 16);
2946  /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2947 
2948  dspfunc(put_qpel, 1, 8);
2949  dspfunc(put_no_rnd_qpel, 1, 8);
2950 
2951  dspfunc(avg_qpel, 1, 8);
2952  /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2953 
2954 #undef dspfunc
2955 
2964 
2965 #define SET_CMP_FUNC(name) \
2966  c->name[0]= name ## 16_c;\
2967  c->name[1]= name ## 8x8_c;
2968 
2969  SET_CMP_FUNC(hadamard8_diff)
2970  c->hadamard8_diff[4]= hadamard8_intra16_c;
2972  SET_CMP_FUNC(dct_sad)
2973  SET_CMP_FUNC(dct_max)
2974 #if CONFIG_GPL
2975  SET_CMP_FUNC(dct264_sad)
2976 #endif
2977  c->sad[0]= pix_abs16_c;
2978  c->sad[1]= pix_abs8_c;
2979  c->sse[0]= sse16_c;
2980  c->sse[1]= sse8_c;
2981  c->sse[2]= sse4_c;
2982  SET_CMP_FUNC(quant_psnr)
2983  SET_CMP_FUNC(rd)
2984  SET_CMP_FUNC(bit)
2985  c->vsad[0]= vsad16_c;
2986  c->vsad[4]= vsad_intra16_c;
2987  c->vsad[5]= vsad_intra8_c;
2988  c->vsse[0]= vsse16_c;
2989  c->vsse[4]= vsse_intra16_c;
2990  c->vsse[5]= vsse_intra8_c;
2991  c->nsse[0]= nsse16_c;
2992  c->nsse[1]= nsse8_c;
2993 #if CONFIG_DWT
2995 #endif
2996 
2998 
2999  c->add_bytes= add_bytes_c;
3005  c->bswap_buf= bswap_buf;
3006  c->bswap16_buf = bswap16_buf;
3007 
3008  if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
3011  }
3012 
3014 
3017 
3018 #if CONFIG_VORBIS_DECODER
3020 #endif
3032 
3033  c->shrink[0]= av_image_copy_plane;
3034  c->shrink[1]= ff_shrink22;
3035  c->shrink[2]= ff_shrink44;
3036  c->shrink[3]= ff_shrink88;
3037 
3038  memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
3039  memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
3040 
3041 #undef FUNC
3042 #undef FUNCC
3043 #define FUNC(f, depth) f ## _ ## depth
3044 #define FUNCC(f, depth) f ## _ ## depth ## _c
3045 
3046 #define dspfunc1(PFX, IDX, NUM, depth)\
3047  c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
3048  c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
3049  c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
3050  c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
3051 
3052 #define dspfunc2(PFX, IDX, NUM, depth)\
3053  c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
3054  c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
3055  c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
3056  c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
3057  c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
3058  c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
3059  c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
3060  c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
3061  c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
3062  c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
3063  c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
3064  c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
3065  c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
3066  c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
3067  c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
3068  c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3069 
3070 
3071 #define BIT_DEPTH_FUNCS(depth, dct)\
3072  c->get_pixels = FUNCC(get_pixels ## dct , depth);\
3073  c->draw_edges = FUNCC(draw_edges , depth);\
3074  c->clear_block = FUNCC(clear_block ## dct , depth);\
3075  c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
3076  c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
3077  c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
3078  c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
3079  c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3080 \
3081  c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
3082  c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
3083  c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
3084  c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
3085  c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
3086  c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
3087 \
3088  dspfunc1(put , 0, 16, depth);\
3089  dspfunc1(put , 1, 8, depth);\
3090  dspfunc1(put , 2, 4, depth);\
3091  dspfunc1(put , 3, 2, depth);\
3092  dspfunc1(put_no_rnd, 0, 16, depth);\
3093  dspfunc1(put_no_rnd, 1, 8, depth);\
3094  dspfunc1(avg , 0, 16, depth);\
3095  dspfunc1(avg , 1, 8, depth);\
3096  dspfunc1(avg , 2, 4, depth);\
3097  dspfunc1(avg , 3, 2, depth);\
3098  dspfunc1(avg_no_rnd, 0, 16, depth);\
3099  dspfunc1(avg_no_rnd, 1, 8, depth);\
3100 \
3101  dspfunc2(put_h264_qpel, 0, 16, depth);\
3102  dspfunc2(put_h264_qpel, 1, 8, depth);\
3103  dspfunc2(put_h264_qpel, 2, 4, depth);\
3104  dspfunc2(put_h264_qpel, 3, 2, depth);\
3105  dspfunc2(avg_h264_qpel, 0, 16, depth);\
3106  dspfunc2(avg_h264_qpel, 1, 8, depth);\
3107  dspfunc2(avg_h264_qpel, 2, 4, depth);
3108 
3109  switch (avctx->bits_per_raw_sample) {
3110  case 9:
3111  if (c->dct_bits == 32) {
3112  BIT_DEPTH_FUNCS(9, _32);
3113  } else {
3114  BIT_DEPTH_FUNCS(9, _16);
3115  }
3116  break;
3117  case 10:
3118  if (c->dct_bits == 32) {
3119  BIT_DEPTH_FUNCS(10, _32);
3120  } else {
3121  BIT_DEPTH_FUNCS(10, _16);
3122  }
3123  break;
3124  case 12:
3125  if (c->dct_bits == 32) {
3126  BIT_DEPTH_FUNCS(12, _32);
3127  } else {
3128  BIT_DEPTH_FUNCS(12, _16);
3129  }
3130  break;
3131  case 14:
3132  if (c->dct_bits == 32) {
3133  BIT_DEPTH_FUNCS(14, _32);
3134  } else {
3135  BIT_DEPTH_FUNCS(14, _16);
3136  }
3137  break;
3138  default:
3139  if(avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
3140  BIT_DEPTH_FUNCS(8, _16);
3141  }
3142  break;
3143  }
3144 
3145 
3146  if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
3147  if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
3148  if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
3149  if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
3150  if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
3151  if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
3152  if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
3153  if (HAVE_MIPSFPU) ff_dsputil_init_mips (c, avctx);
3154 
3155  for (i = 0; i < 4; i++) {
3156  for (j = 0; j < 16; j++) {
3157  if(!c->put_2tap_qpel_pixels_tab[i][j])
3158  c->put_2tap_qpel_pixels_tab[i][j] =
3159  c->put_h264_qpel_pixels_tab[i][j];
3160  if(!c->avg_2tap_qpel_pixels_tab[i][j])
3161  c->avg_2tap_qpel_pixels_tab[i][j] =
3162  c->avg_h264_qpel_pixels_tab[i][j];
3163  }
3164  }
3165 
3168 }
3169 
3171 {
3172  ff_dsputil_init(c, avctx);
3173 }