FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dct-test.c
Go to the documentation of this file.
1 /*
2  * (c) 2001 Fabrice Bellard
3  * 2007 Marc Hoffman <marc.hoffman@analog.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * DCT test (c) 2001 Fabrice Bellard
25  * Started from sample code by Juan J. Sierralta P.
26  */
27 
28 #include "config.h"
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #if HAVE_UNISTD_H
33 #include <unistd.h>
34 #endif
35 #include <math.h>
36 
37 #include "libavutil/cpu.h"
38 #include "libavutil/common.h"
39 #include "libavutil/lfg.h"
40 #include "libavutil/time.h"
41 
42 #include "dct.h"
43 #include "simple_idct.h"
44 #include "aandcttab.h"
45 #include "faandct.h"
46 #include "faanidct.h"
47 #include "x86/idct_xvid.h"
48 #include "dctref.h"
49 
50 #undef printf
51 
52 // BFIN
53 void ff_bfin_idct(int16_t *block);
54 void ff_bfin_fdct(int16_t *block);
55 
56 // ALTIVEC
57 void ff_fdct_altivec(int16_t *block);
58 
59 // ARM
60 void ff_j_rev_dct_arm(int16_t *data);
61 void ff_simple_idct_arm(int16_t *data);
62 void ff_simple_idct_armv5te(int16_t *data);
63 void ff_simple_idct_armv6(int16_t *data);
64 void ff_simple_idct_neon(int16_t *data);
65 
66 void ff_simple_idct_axp(int16_t *data);
67 
68 struct algo {
69  const char *name;
70  void (*func)(int16_t *block);
74  int nonspec;
75 };
76 
77 static int cpu_flags;
78 
79 static const struct algo fdct_tab[] = {
80  { "REF-DBL", ff_ref_fdct, NO_PERM },
81  { "FAAN", ff_faandct, NO_PERM },
82  { "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM },
83  { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },
84 
85 #if HAVE_MMX_INLINE
86  { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
87 #endif
88 #if HAVE_MMXEXT_INLINE
90 #endif
91 #if HAVE_SSE2_INLINE
93 #endif
94 
95 #if HAVE_ALTIVEC
96  { "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
97 #endif
98 
99 #if ARCH_BFIN
100  { "BFINfdct", ff_bfin_fdct, NO_PERM },
101 #endif
102 
103  { 0 }
104 };
105 
106 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
107 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
108  int16_t *block, int16_t *qmat);
109 
110 static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
111  DECLARE_ALIGNED(16, static int16_t, qmat)[64];
112  DECLARE_ALIGNED(16, static int16_t, tmp)[64];
113  int i;
114 
115  for(i=0; i<64; i++){
116  qmat[i]=4;
117  tmp[i]= dst[i];
118  }
119  ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat);
120 }
121 #endif
122 
123 static const struct algo idct_tab[] = {
124  { "FAANI", ff_faanidct, NO_PERM },
125  { "REF-DBL", ff_ref_idct, NO_PERM },
126  { "INT", ff_j_rev_dct, MMX_PERM },
127  { "SIMPLE-C", ff_simple_idct_8, NO_PERM },
128 
129 #if HAVE_MMX_INLINE
131  { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
132 #endif
133 #if HAVE_MMXEXT_INLINE
134  { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
135 #endif
136 #if HAVE_SSE2_INLINE
137  { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
138 #if ARCH_X86_64 && HAVE_YASM
139  { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },
140 #endif
141 #endif
142 
143 #if ARCH_BFIN
144  { "BFINidct", ff_bfin_idct, NO_PERM },
145 #endif
146 
147 #if ARCH_ARM
148  { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM },
149  { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },
150 #endif
151 #if HAVE_ARMV5TE
152  { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE },
153 #endif
154 #if HAVE_ARMV6
155  { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 },
156 #endif
157 #if HAVE_NEON
159 #endif
160 
161 #if ARCH_ALPHA
162  { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },
163 #endif
164 
165  { 0 }
166 };
167 
168 #define AANSCALE_BITS 12
169 
170 #define NB_ITS 20000
171 #define NB_ITS_SPEED 50000
172 
173 static short idct_mmx_perm[64];
174 
175 static short idct_simple_mmx_perm[64] = {
176  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
177  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
178  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
179  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
180  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
181  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
182  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
183  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
184 };
185 
186 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
187 
188 static void idct_mmx_init(void)
189 {
190  int i;
191 
192  /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
193  for (i = 0; i < 64; i++) {
194  idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
195  }
196 }
197 
198 DECLARE_ALIGNED(16, static int16_t, block)[64];
199 DECLARE_ALIGNED(8, static int16_t, block1)[64];
200 
201 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
202 {
203  int i, j;
204 
205  memset(block, 0, 64 * sizeof(*block));
206 
207  switch (test) {
208  case 0:
209  for (i = 0; i < 64; i++)
210  block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
211  if (is_idct) {
212  ff_ref_fdct(block);
213  for (i = 0; i < 64; i++)
214  block[i] >>= 3;
215  }
216  break;
217  case 1:
218  j = av_lfg_get(prng) % 10 + 1;
219  for (i = 0; i < j; i++) {
220  int idx = av_lfg_get(prng) % 64;
221  block[idx] = av_lfg_get(prng) % (2*vals) -vals;
222  }
223  break;
224  case 2:
225  block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
226  block[63] = (block[0] & 1) ^ 1;
227  break;
228  }
229 }
230 
231 static void permute(int16_t dst[64], const int16_t src[64], int perm)
232 {
233  int i;
234 
235  if (perm == MMX_PERM) {
236  for (i = 0; i < 64; i++)
237  dst[idct_mmx_perm[i]] = src[i];
238  } else if (perm == MMX_SIMPLE_PERM) {
239  for (i = 0; i < 64; i++)
240  dst[idct_simple_mmx_perm[i]] = src[i];
241  } else if (perm == SSE2_PERM) {
242  for (i = 0; i < 64; i++)
243  dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
244  } else if (perm == PARTTRANS_PERM) {
245  for (i = 0; i < 64; i++)
246  dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
247  } else if (perm == TRANSPOSE_PERM) {
248  for (i = 0; i < 64; i++)
249  dst[(i>>3) | ((i<<3)&0x38)] = src[i];
250  } else {
251  for (i = 0; i < 64; i++)
252  dst[i] = src[i];
253  }
254 }
255 
256 static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
257 {
258  void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
259  int it, i, scale;
260  int err_inf, v;
261  int64_t err2, ti, ti1, it1, err_sum = 0;
262  int64_t sysErr[64], sysErrMax = 0;
263  int maxout = 0;
264  int blockSumErrMax = 0, blockSumErr;
265  AVLFG prng;
266  const int vals=1<<bits;
267  double omse, ome;
268  int spec_err;
269 
270  av_lfg_init(&prng, 1);
271 
272  err_inf = 0;
273  err2 = 0;
274  for (i = 0; i < 64; i++)
275  sysErr[i] = 0;
276  for (it = 0; it < NB_ITS; it++) {
277  init_block(block1, test, is_idct, &prng, vals);
278  permute(block, block1, dct->format);
279 
280  dct->func(block);
281  emms_c();
282 
283  if (dct->format == SCALE_PERM) {
284  for (i = 0; i < 64; i++) {
285  scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
286  block[i] = (block[i] * scale) >> AANSCALE_BITS;
287  }
288  }
289 
290  ref(block1);
291 
292  blockSumErr = 0;
293  for (i = 0; i < 64; i++) {
294  int err = block[i] - block1[i];
295  err_sum += err;
296  v = abs(err);
297  if (v > err_inf)
298  err_inf = v;
299  err2 += v * v;
300  sysErr[i] += block[i] - block1[i];
301  blockSumErr += v;
302  if (abs(block[i]) > maxout)
303  maxout = abs(block[i]);
304  }
305  if (blockSumErrMax < blockSumErr)
306  blockSumErrMax = blockSumErr;
307  }
308  for (i = 0; i < 64; i++)
309  sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
310 
311  for (i = 0; i < 64; i++) {
312  if (i % 8 == 0)
313  printf("\n");
314  printf("%7d ", (int) sysErr[i]);
315  }
316  printf("\n");
317 
318  omse = (double) err2 / NB_ITS / 64;
319  ome = (double) err_sum / NB_ITS / 64;
320 
321  spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
322 
323  printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
324  is_idct ? "IDCT" : "DCT", dct->name, err_inf,
325  omse, ome, (double) sysErrMax / NB_ITS,
326  maxout, blockSumErrMax);
327 
328  if (spec_err && !dct->nonspec)
329  return 1;
330 
331  if (!speed)
332  return 0;
333 
334  /* speed test */
335 
336  init_block(block, test, is_idct, &prng, vals);
337  permute(block1, block, dct->format);
338 
339  ti = av_gettime();
340  it1 = 0;
341  do {
342  for (it = 0; it < NB_ITS_SPEED; it++) {
343  memcpy(block, block1, sizeof(block));
344  dct->func(block);
345  }
346  emms_c();
347  it1 += NB_ITS_SPEED;
348  ti1 = av_gettime() - ti;
349  } while (ti1 < 1000000);
350 
351  printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
352  (double) it1 * 1000.0 / (double) ti1);
353 
354  return 0;
355 }
356 
359 
360 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
361 {
362  static int init;
363  static double c8[8][8];
364  static double c4[4][4];
365  double block1[64], block2[64], block3[64];
366  double s, sum, v;
367  int i, j, k;
368 
369  if (!init) {
370  init = 1;
371 
372  for (i = 0; i < 8; i++) {
373  sum = 0;
374  for (j = 0; j < 8; j++) {
375  s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
376  c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
377  sum += c8[i][j] * c8[i][j];
378  }
379  }
380 
381  for (i = 0; i < 4; i++) {
382  sum = 0;
383  for (j = 0; j < 4; j++) {
384  s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
385  c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
386  sum += c4[i][j] * c4[i][j];
387  }
388  }
389  }
390 
391  /* butterfly */
392  s = 0.5 * sqrt(2.0);
393  for (i = 0; i < 4; i++) {
394  for (j = 0; j < 8; j++) {
395  block1[8 * (2 * i) + j] =
396  (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
397  block1[8 * (2 * i + 1) + j] =
398  (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
399  }
400  }
401 
402  /* idct8 on lines */
403  for (i = 0; i < 8; i++) {
404  for (j = 0; j < 8; j++) {
405  sum = 0;
406  for (k = 0; k < 8; k++)
407  sum += c8[k][j] * block1[8 * i + k];
408  block2[8 * i + j] = sum;
409  }
410  }
411 
412  /* idct4 */
413  for (i = 0; i < 8; i++) {
414  for (j = 0; j < 4; j++) {
415  /* top */
416  sum = 0;
417  for (k = 0; k < 4; k++)
418  sum += c4[k][j] * block2[8 * (2 * k) + i];
419  block3[8 * (2 * j) + i] = sum;
420 
421  /* bottom */
422  sum = 0;
423  for (k = 0; k < 4; k++)
424  sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
425  block3[8 * (2 * j + 1) + i] = sum;
426  }
427  }
428 
429  /* clamp and store the result */
430  for (i = 0; i < 8; i++) {
431  for (j = 0; j < 8; j++) {
432  v = block3[8 * i + j];
433  if (v < 0) v = 0;
434  else if (v > 255) v = 255;
435  dest[i * linesize + j] = (int) rint(v);
436  }
437  }
438 }
439 
440 static void idct248_error(const char *name,
441  void (*idct248_put)(uint8_t *dest, int line_size,
442  int16_t *block),
443  int speed)
444 {
445  int it, i, it1, ti, ti1, err_max, v;
446  AVLFG prng;
447 
448  av_lfg_init(&prng, 1);
449 
450  /* just one test to see if code is correct (precision is less
451  important here) */
452  err_max = 0;
453  for (it = 0; it < NB_ITS; it++) {
454  /* XXX: use forward transform to generate values */
455  for (i = 0; i < 64; i++)
456  block1[i] = av_lfg_get(&prng) % 256 - 128;
457  block1[0] += 1024;
458 
459  for (i = 0; i < 64; i++)
460  block[i] = block1[i];
461  idct248_ref(img_dest1, 8, block);
462 
463  for (i = 0; i < 64; i++)
464  block[i] = block1[i];
465  idct248_put(img_dest, 8, block);
466 
467  for (i = 0; i < 64; i++) {
468  v = abs((int) img_dest[i] - (int) img_dest1[i]);
469  if (v == 255)
470  printf("%d %d\n", img_dest[i], img_dest1[i]);
471  if (v > err_max)
472  err_max = v;
473  }
474 #if 0
475  printf("ref=\n");
476  for(i=0;i<8;i++) {
477  int j;
478  for(j=0;j<8;j++) {
479  printf(" %3d", img_dest1[i*8+j]);
480  }
481  printf("\n");
482  }
483 
484  printf("out=\n");
485  for(i=0;i<8;i++) {
486  int j;
487  for(j=0;j<8;j++) {
488  printf(" %3d", img_dest[i*8+j]);
489  }
490  printf("\n");
491  }
492 #endif
493  }
494  printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
495 
496  if (!speed)
497  return;
498 
499  ti = av_gettime();
500  it1 = 0;
501  do {
502  for (it = 0; it < NB_ITS_SPEED; it++) {
503  for (i = 0; i < 64; i++)
504  block[i] = block1[i];
505  idct248_put(img_dest, 8, block);
506  }
507  emms_c();
508  it1 += NB_ITS_SPEED;
509  ti1 = av_gettime() - ti;
510  } while (ti1 < 1000000);
511 
512  printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
513  (double) it1 * 1000.0 / (double) ti1);
514 }
515 
516 static void help(void)
517 {
518  printf("dct-test [-i] [<test-number>] [<bits>]\n"
519  "test-number 0 -> test with random matrixes\n"
520  " 1 -> test with random sparse matrixes\n"
521  " 2 -> do 3. test from mpeg4 std\n"
522  "bits Number of time domain bits to use, 8 is default\n"
523  "-i test IDCT implementations\n"
524  "-4 test IDCT248 implementations\n"
525  "-t speed test\n");
526 }
527 
528 #if !HAVE_GETOPT
529 #include "compat/getopt.c"
530 #endif
531 
532 int main(int argc, char **argv)
533 {
534  int test_idct = 0, test_248_dct = 0;
535  int c, i;
536  int test = 1;
537  int speed = 0;
538  int err = 0;
539  int bits=8;
540 
542 
543  ff_ref_dct_init();
544  idct_mmx_init();
545 
546  for (;;) {
547  c = getopt(argc, argv, "ih4t");
548  if (c == -1)
549  break;
550  switch (c) {
551  case 'i':
552  test_idct = 1;
553  break;
554  case '4':
555  test_248_dct = 1;
556  break;
557  case 't':
558  speed = 1;
559  break;
560  default:
561  case 'h':
562  help();
563  return 0;
564  }
565  }
566 
567  if (optind < argc)
568  test = atoi(argv[optind]);
569  if(optind+1 < argc) bits= atoi(argv[optind+1]);
570 
571  printf("ffmpeg DCT/IDCT test\n");
572 
573  if (test_248_dct) {
574  idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
575  } else {
576  const struct algo *algos = test_idct ? idct_tab : fdct_tab;
577  for (i = 0; algos[i].name; i++)
578  if (!(~cpu_flags & algos[i].mm_support)) {
579  err |= dct_error(&algos[i], test, test_idct, speed, bits);
580  }
581  }
582 
583  if (err)
584  printf("Error: %d.\n", err);
585 
586  return !!err;
587 }