FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dct-test.c
Go to the documentation of this file.
1 /*
2  * (c) 2001 Fabrice Bellard
3  * 2007 Marc Hoffman <marc.hoffman@analog.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * DCT test (c) 2001 Fabrice Bellard
25  * Started from sample code by Juan J. Sierralta P.
26  */
27 
28 #include "config.h"
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #if HAVE_UNISTD_H
33 #include <unistd.h>
34 #endif
35 #include <math.h>
36 
37 #include "libavutil/cpu.h"
38 #include "libavutil/common.h"
39 #include "libavutil/lfg.h"
40 #include "libavutil/time.h"
41 
42 #include "dct.h"
43 #include "simple_idct.h"
44 #include "aandcttab.h"
45 #include "faandct.h"
46 #include "faanidct.h"
47 #include "x86/idct_xvid.h"
48 #include "dctref.h"
49 
50 #undef printf
51 
52 void ff_mmx_idct(int16_t *data);
53 void ff_mmxext_idct(int16_t *data);
54 
55 // BFIN
56 void ff_bfin_idct(int16_t *block);
57 void ff_bfin_fdct(int16_t *block);
58 
59 // ALTIVEC
60 void ff_fdct_altivec(int16_t *block);
61 
62 // ARM
63 void ff_j_rev_dct_arm(int16_t *data);
64 void ff_simple_idct_arm(int16_t *data);
65 void ff_simple_idct_armv5te(int16_t *data);
66 void ff_simple_idct_armv6(int16_t *data);
67 void ff_simple_idct_neon(int16_t *data);
68 
69 void ff_simple_idct_axp(int16_t *data);
70 
71 struct algo {
72  const char *name;
73  void (*func)(int16_t *block);
77  int nonspec;
78 };
79 
80 static int cpu_flags;
81 
82 static const struct algo fdct_tab[] = {
83  { "REF-DBL", ff_ref_fdct, NO_PERM },
84  { "FAAN", ff_faandct, NO_PERM },
85  { "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM },
86  { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },
87 
88 #if HAVE_MMX_INLINE
89  { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
92 #endif
93 
94 #if HAVE_ALTIVEC
95  { "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
96 #endif
97 
98 #if ARCH_BFIN
99  { "BFINfdct", ff_bfin_fdct, NO_PERM },
100 #endif
101 
102  { 0 }
103 };
104 
105 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
106 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
107  int16_t *block, int16_t *qmat);
108 
109 static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
110  DECLARE_ALIGNED(16, static int16_t, qmat)[64];
111  DECLARE_ALIGNED(16, static int16_t, tmp)[64];
112  int i;
113 
114  for(i=0; i<64; i++){
115  qmat[i]=4;
116  tmp[i]= dst[i];
117  }
118  ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat);
119 }
120 #endif
121 
122 static const struct algo idct_tab[] = {
123  { "FAANI", ff_faanidct, NO_PERM },
124  { "REF-DBL", ff_ref_idct, NO_PERM },
125  { "INT", ff_j_rev_dct, MMX_PERM },
126  { "SIMPLE-C", ff_simple_idct_8, NO_PERM },
127 
128 #if HAVE_MMX_INLINE
129 #if CONFIG_GPL
130  { "LIBMPEG2-MMX", ff_mmx_idct, MMX_PERM, AV_CPU_FLAG_MMX, 1 },
131  { "LIBMPEG2-MMX2", ff_mmxext_idct, MMX_PERM, AV_CPU_FLAG_MMX2, 1 },
132 #endif
134  { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
135  { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
136  { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
137 #if ARCH_X86_64 && HAVE_YASM
138  { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },
139 #endif
140 #endif
141 
142 #if ARCH_BFIN
143  { "BFINidct", ff_bfin_idct, NO_PERM },
144 #endif
145 
146 #if ARCH_ARM
147  { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM },
148  { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },
149 #endif
150 #if HAVE_ARMV5TE
151  { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE },
152 #endif
153 #if HAVE_ARMV6
154  { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 },
155 #endif
156 #if HAVE_NEON
158 #endif
159 
160 #if ARCH_ALPHA
161  { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },
162 #endif
163 
164  { 0 }
165 };
166 
167 #define AANSCALE_BITS 12
168 
169 #define NB_ITS 20000
170 #define NB_ITS_SPEED 50000
171 
172 static short idct_mmx_perm[64];
173 
174 static short idct_simple_mmx_perm[64] = {
175  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
176  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
177  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
178  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
179  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
180  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
181  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
182  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
183 };
184 
185 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
186 
187 static void idct_mmx_init(void)
188 {
189  int i;
190 
191  /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
192  for (i = 0; i < 64; i++) {
193  idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
194  }
195 }
196 
197 DECLARE_ALIGNED(16, static int16_t, block)[64];
198 DECLARE_ALIGNED(8, static int16_t, block1)[64];
199 
200 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
201 {
202  int i, j;
203 
204  memset(block, 0, 64 * sizeof(*block));
205 
206  switch (test) {
207  case 0:
208  for (i = 0; i < 64; i++)
209  block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
210  if (is_idct) {
211  ff_ref_fdct(block);
212  for (i = 0; i < 64; i++)
213  block[i] >>= 3;
214  }
215  break;
216  case 1:
217  j = av_lfg_get(prng) % 10 + 1;
218  for (i = 0; i < j; i++) {
219  int idx = av_lfg_get(prng) % 64;
220  block[idx] = av_lfg_get(prng) % (2*vals) -vals;
221  }
222  break;
223  case 2:
224  block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
225  block[63] = (block[0] & 1) ^ 1;
226  break;
227  }
228 }
229 
230 static void permute(int16_t dst[64], const int16_t src[64], int perm)
231 {
232  int i;
233 
234  if (perm == MMX_PERM) {
235  for (i = 0; i < 64; i++)
236  dst[idct_mmx_perm[i]] = src[i];
237  } else if (perm == MMX_SIMPLE_PERM) {
238  for (i = 0; i < 64; i++)
239  dst[idct_simple_mmx_perm[i]] = src[i];
240  } else if (perm == SSE2_PERM) {
241  for (i = 0; i < 64; i++)
242  dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
243  } else if (perm == PARTTRANS_PERM) {
244  for (i = 0; i < 64; i++)
245  dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
246  } else if (perm == TRANSPOSE_PERM) {
247  for (i = 0; i < 64; i++)
248  dst[(i>>3) | ((i<<3)&0x38)] = src[i];
249  } else {
250  for (i = 0; i < 64; i++)
251  dst[i] = src[i];
252  }
253 }
254 
255 static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
256 {
257  void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
258  int it, i, scale;
259  int err_inf, v;
260  int64_t err2, ti, ti1, it1, err_sum = 0;
261  int64_t sysErr[64], sysErrMax = 0;
262  int maxout = 0;
263  int blockSumErrMax = 0, blockSumErr;
264  AVLFG prng;
265  const int vals=1<<bits;
266  double omse, ome;
267  int spec_err;
268 
269  av_lfg_init(&prng, 1);
270 
271  err_inf = 0;
272  err2 = 0;
273  for (i = 0; i < 64; i++)
274  sysErr[i] = 0;
275  for (it = 0; it < NB_ITS; it++) {
276  init_block(block1, test, is_idct, &prng, vals);
277  permute(block, block1, dct->format);
278 
279  dct->func(block);
280  emms_c();
281 
282  if (dct->format == SCALE_PERM) {
283  for (i = 0; i < 64; i++) {
284  scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
285  block[i] = (block[i] * scale) >> AANSCALE_BITS;
286  }
287  }
288 
289  ref(block1);
290 
291  blockSumErr = 0;
292  for (i = 0; i < 64; i++) {
293  int err = block[i] - block1[i];
294  err_sum += err;
295  v = abs(err);
296  if (v > err_inf)
297  err_inf = v;
298  err2 += v * v;
299  sysErr[i] += block[i] - block1[i];
300  blockSumErr += v;
301  if (abs(block[i]) > maxout)
302  maxout = abs(block[i]);
303  }
304  if (blockSumErrMax < blockSumErr)
305  blockSumErrMax = blockSumErr;
306  }
307  for (i = 0; i < 64; i++)
308  sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
309 
310  for (i = 0; i < 64; i++) {
311  if (i % 8 == 0)
312  printf("\n");
313  printf("%7d ", (int) sysErr[i]);
314  }
315  printf("\n");
316 
317  omse = (double) err2 / NB_ITS / 64;
318  ome = (double) err_sum / NB_ITS / 64;
319 
320  spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
321 
322  printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
323  is_idct ? "IDCT" : "DCT", dct->name, err_inf,
324  omse, ome, (double) sysErrMax / NB_ITS,
325  maxout, blockSumErrMax);
326 
327  if (spec_err && !dct->nonspec)
328  return 1;
329 
330  if (!speed)
331  return 0;
332 
333  /* speed test */
334 
335  init_block(block, test, is_idct, &prng, vals);
336  permute(block1, block, dct->format);
337 
338  ti = av_gettime();
339  it1 = 0;
340  do {
341  for (it = 0; it < NB_ITS_SPEED; it++) {
342  memcpy(block, block1, sizeof(block));
343  dct->func(block);
344  }
345  emms_c();
346  it1 += NB_ITS_SPEED;
347  ti1 = av_gettime() - ti;
348  } while (ti1 < 1000000);
349 
350  printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
351  (double) it1 * 1000.0 / (double) ti1);
352 
353  return 0;
354 }
355 
358 
359 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
360 {
361  static int init;
362  static double c8[8][8];
363  static double c4[4][4];
364  double block1[64], block2[64], block3[64];
365  double s, sum, v;
366  int i, j, k;
367 
368  if (!init) {
369  init = 1;
370 
371  for (i = 0; i < 8; i++) {
372  sum = 0;
373  for (j = 0; j < 8; j++) {
374  s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
375  c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
376  sum += c8[i][j] * c8[i][j];
377  }
378  }
379 
380  for (i = 0; i < 4; i++) {
381  sum = 0;
382  for (j = 0; j < 4; j++) {
383  s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
384  c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
385  sum += c4[i][j] * c4[i][j];
386  }
387  }
388  }
389 
390  /* butterfly */
391  s = 0.5 * sqrt(2.0);
392  for (i = 0; i < 4; i++) {
393  for (j = 0; j < 8; j++) {
394  block1[8 * (2 * i) + j] =
395  (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
396  block1[8 * (2 * i + 1) + j] =
397  (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
398  }
399  }
400 
401  /* idct8 on lines */
402  for (i = 0; i < 8; i++) {
403  for (j = 0; j < 8; j++) {
404  sum = 0;
405  for (k = 0; k < 8; k++)
406  sum += c8[k][j] * block1[8 * i + k];
407  block2[8 * i + j] = sum;
408  }
409  }
410 
411  /* idct4 */
412  for (i = 0; i < 8; i++) {
413  for (j = 0; j < 4; j++) {
414  /* top */
415  sum = 0;
416  for (k = 0; k < 4; k++)
417  sum += c4[k][j] * block2[8 * (2 * k) + i];
418  block3[8 * (2 * j) + i] = sum;
419 
420  /* bottom */
421  sum = 0;
422  for (k = 0; k < 4; k++)
423  sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
424  block3[8 * (2 * j + 1) + i] = sum;
425  }
426  }
427 
428  /* clamp and store the result */
429  for (i = 0; i < 8; i++) {
430  for (j = 0; j < 8; j++) {
431  v = block3[8 * i + j];
432  if (v < 0) v = 0;
433  else if (v > 255) v = 255;
434  dest[i * linesize + j] = (int) rint(v);
435  }
436  }
437 }
438 
439 static void idct248_error(const char *name,
440  void (*idct248_put)(uint8_t *dest, int line_size,
441  int16_t *block),
442  int speed)
443 {
444  int it, i, it1, ti, ti1, err_max, v;
445  AVLFG prng;
446 
447  av_lfg_init(&prng, 1);
448 
449  /* just one test to see if code is correct (precision is less
450  important here) */
451  err_max = 0;
452  for (it = 0; it < NB_ITS; it++) {
453  /* XXX: use forward transform to generate values */
454  for (i = 0; i < 64; i++)
455  block1[i] = av_lfg_get(&prng) % 256 - 128;
456  block1[0] += 1024;
457 
458  for (i = 0; i < 64; i++)
459  block[i] = block1[i];
460  idct248_ref(img_dest1, 8, block);
461 
462  for (i = 0; i < 64; i++)
463  block[i] = block1[i];
464  idct248_put(img_dest, 8, block);
465 
466  for (i = 0; i < 64; i++) {
467  v = abs((int) img_dest[i] - (int) img_dest1[i]);
468  if (v == 255)
469  printf("%d %d\n", img_dest[i], img_dest1[i]);
470  if (v > err_max)
471  err_max = v;
472  }
473 #if 0
474  printf("ref=\n");
475  for(i=0;i<8;i++) {
476  int j;
477  for(j=0;j<8;j++) {
478  printf(" %3d", img_dest1[i*8+j]);
479  }
480  printf("\n");
481  }
482 
483  printf("out=\n");
484  for(i=0;i<8;i++) {
485  int j;
486  for(j=0;j<8;j++) {
487  printf(" %3d", img_dest[i*8+j]);
488  }
489  printf("\n");
490  }
491 #endif
492  }
493  printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
494 
495  if (!speed)
496  return;
497 
498  ti = av_gettime();
499  it1 = 0;
500  do {
501  for (it = 0; it < NB_ITS_SPEED; it++) {
502  for (i = 0; i < 64; i++)
503  block[i] = block1[i];
504  idct248_put(img_dest, 8, block);
505  }
506  emms_c();
507  it1 += NB_ITS_SPEED;
508  ti1 = av_gettime() - ti;
509  } while (ti1 < 1000000);
510 
511  printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
512  (double) it1 * 1000.0 / (double) ti1);
513 }
514 
515 static void help(void)
516 {
517  printf("dct-test [-i] [<test-number>] [<bits>]\n"
518  "test-number 0 -> test with random matrixes\n"
519  " 1 -> test with random sparse matrixes\n"
520  " 2 -> do 3. test from mpeg4 std\n"
521  "bits Number of time domain bits to use, 8 is default\n"
522  "-i test IDCT implementations\n"
523  "-4 test IDCT248 implementations\n"
524  "-t speed test\n");
525 }
526 
527 #if !HAVE_GETOPT
528 #include "compat/getopt.c"
529 #endif
530 
531 int main(int argc, char **argv)
532 {
533  int test_idct = 0, test_248_dct = 0;
534  int c, i;
535  int test = 1;
536  int speed = 0;
537  int err = 0;
538  int bits=8;
539 
541 
542  ff_ref_dct_init();
543  idct_mmx_init();
544 
545  for (;;) {
546  c = getopt(argc, argv, "ih4t");
547  if (c == -1)
548  break;
549  switch (c) {
550  case 'i':
551  test_idct = 1;
552  break;
553  case '4':
554  test_248_dct = 1;
555  break;
556  case 't':
557  speed = 1;
558  break;
559  default:
560  case 'h':
561  help();
562  return 0;
563  }
564  }
565 
566  if (optind < argc)
567  test = atoi(argv[optind]);
568  if(optind+1 < argc) bits= atoi(argv[optind+1]);
569 
570  printf("ffmpeg DCT/IDCT test\n");
571 
572  if (test_248_dct) {
573  idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
574  } else {
575  const struct algo *algos = test_idct ? idct_tab : fdct_tab;
576  for (i = 0; algos[i].name; i++)
577  if (!(~cpu_flags & algos[i].mm_support)) {
578  err |= dct_error(&algos[i], test, test_idct, speed, bits);
579  }
580  }
581 
582  return err;
583 }