92 #if CONFIG_MPEG4_DECODER 
  108 #define AANSCALE_BITS 12 
  111 #define NB_ITS_SPEED 50000 
  120     memset(block, 0, 64 * 
sizeof(*block));
 
  124         for (i = 0; i < 64; i++)
 
  125             block[i] = (
av_lfg_get(prng) % (2*vals)) -vals;
 
  128             for (i = 0; i < 64; i++)
 
  134         for (i = 0; i < j; i++) {
 
  136             block[idx] = 
av_lfg_get(prng) % (2*vals) -vals;
 
  140         block[ 0] = 
av_lfg_get(prng) % (16*vals) - (8*vals);
 
  141         block[63] = (block[0] & 1) ^ 1;
 
  158         for (i = 0; i < 64; i++)
 
  159             dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i];
 
  162         for (i = 0; i < 64; i++)
 
  163             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
 
  166         for (i = 0; i < 64; i++)
 
  167             dst[(i>>3) | ((i<<3)&0x38)] = src[i];
 
  170         for (i = 0; i < 64; i++)
 
  181     int64_t err2, ti, ti1, it1, err_sum = 0;
 
  182     int64_t sysErr[64], sysErrMax = 0;
 
  184     int blockSumErrMax = 0, blockSumErr;
 
  186     const int vals=1<<
bits;
 
  194     for (i = 0; i < 64; i++)
 
  196     for (it = 0; it < 
NB_ITS; it++) {
 
  203         if (!strcmp(dct->
name, 
"IJG-AAN-INT")) {
 
  204             for (i = 0; i < 64; i++) {
 
  211         if (!strcmp(dct->
name, 
"PR-SSE2"))
 
  212             for (i = 0; i < 64; i++)
 
  216         for (i = 0; i < 64; i++) {
 
  223             sysErr[i] += 
block[i] - block1[i];
 
  225             if (abs(
block[i]) > maxout)
 
  226                 maxout = abs(
block[i]);
 
  228         if (blockSumErrMax < blockSumErr)
 
  229             blockSumErrMax = blockSumErr;
 
  231     for (i = 0; i < 64; i++)
 
  232         sysErrMax = 
FFMAX(sysErrMax, 
FFABS(sysErr[i]));
 
  234     for (i = 0; i < 64; i++) {
 
  237         printf(
"%7d ", (
int) sysErr[i]);
 
  241     omse = (double) err2 / NB_ITS / 64;
 
  242     ome  = (double) err_sum / NB_ITS / 64;
 
  244     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
 
  246     printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
 
  247            is_idct ? 
"IDCT" : 
"DCT", dct->
name, err_inf,
 
  248            omse, ome, (
double) sysErrMax / NB_ITS,
 
  249            maxout, blockSumErrMax);
 
  251     if (spec_err && !dct->
nonspec) {
 
  274     } 
while (ti1 < 1000000);
 
  276     printf(
"%s %s: %0.1f kdct/s\n", is_idct ? 
"IDCT" : 
"DCT", dct->
name,
 
  277            (
double) it1 * 1000.0 / (
double) ti1);
 
  288     static double c8[8][8];
 
  289     static double c4[4][4];
 
  290     double block1[64], block2[64], block3[64];
 
  297         for (i = 0; i < 8; i++) {
 
  299             for (j = 0; j < 8; j++) {
 
  300                 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
 
  301                 c8[i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
 
  302                 sum += c8[i][j] * c8[i][j];
 
  306         for (i = 0; i < 4; i++) {
 
  308             for (j = 0; j < 4; j++) {
 
  309                 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
 
  310                 c4[i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
 
  311                 sum += c4[i][j] * c4[i][j];
 
  318     for (i = 0; i < 4; i++) {
 
  319         for (j = 0; j < 8; j++) {
 
  320             block1[8 * (2 * i) + j] =
 
  321                 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * 
s;
 
  322             block1[8 * (2 * i + 1) + j] =
 
  323                 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * 
s;
 
  328     for (i = 0; i < 8; i++) {
 
  329         for (j = 0; j < 8; j++) {
 
  331             for (k = 0; k < 8; k++)
 
  332                 sum += c8[k][j] * block1[8 * i + k];
 
  333             block2[8 * i + j] = sum;
 
  338     for (i = 0; i < 8; i++) {
 
  339         for (j = 0; j < 4; j++) {
 
  342             for (k = 0; k < 4; k++)
 
  343                 sum += c4[k][j] * block2[8 * (2 * k) + i];
 
  344             block3[8 * (2 * j) + i] = sum;
 
  348             for (k = 0; k < 4; k++)
 
  349                 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
 
  350             block3[8 * (2 * j + 1) + i] = sum;
 
  355     for (i = 0; i < 8; i++) {
 
  356         for (j = 0; j < 8; j++) {
 
  357             v = block3[8 * i + j];
 
  359             else if (v > 255) v = 255;
 
  360             dest[i * linesize + j] = (int) 
rint(v);
 
  366                           void (*idct248_put)(
uint8_t *dest, 
int line_size,
 
  370     int it, i, it1, ti, ti1, err_max, v;
 
  378     for (it = 0; it < 
NB_ITS; it++) {
 
  380         for (i = 0; i < 64; i++)
 
  384         for (i = 0; i < 64; i++)
 
  388         for (i = 0; i < 64; i++)
 
  392         for (i = 0; i < 64; i++) {
 
  419     printf(
"%s %s: err_inf=%d\n", 1 ? 
"IDCT248" : 
"DCT248", name, err_max);
 
  428             for (i = 0; i < 64; i++)
 
  435     } 
while (ti1 < 1000000);
 
  437     printf(
"%s %s: %0.1f kdct/s\n", 1 ? 
"IDCT248" : 
"DCT248", name,
 
  438            (
double) it1 * 1000.0 / (
double) ti1);
 
  443     printf(
"dct-test [-i] [<test-number>] [<bits>]\n" 
  444            "test-number 0 -> test with random matrixes\n" 
  445            "            1 -> test with random sparse matrixes\n" 
  446            "            2 -> do 3. test from MPEG-4 std\n" 
  447            "bits        Number of time domain bits to use, 8 is default\n" 
  448            "-i          test IDCT implementations\n" 
  449            "-4          test IDCT248 implementations\n" 
  457 int main(
int argc, 
char **argv)
 
  459     int test_idct = 0, test_248_dct = 0;
 
  469         c = 
getopt(argc, argv, 
"ih4t");
 
  490         test = atoi(argv[
optind]);
 
  491     if(optind+1 < argc) bits= atoi(argv[optind+1]);
 
  493     printf(
"ffmpeg DCT/IDCT test\n");
 
  501                 err |= 
dct_error(&idct_tab[i], test, test_idct, speed, bits);
 
  503             for (i = 0; idct_tab_arch[i].
name; i++)
 
  504                 if (!(~cpu_flags & idct_tab_arch[i].
cpu_flag))
 
  505                     err |= 
dct_error(&idct_tab_arch[i], test, test_idct, speed, bits);
 
  510                 err |= 
dct_error(&fdct_tab[i], test, test_idct, speed, bits);
 
  512             for (i = 0; fdct_tab_arch[i].
name; i++)
 
  513                 if (!(~cpu_flags & fdct_tab_arch[i].
cpu_flag))
 
  514                     err |= 
dct_error(&fdct_tab_arch[i], test, test_idct, speed, bits);
 
  520         printf(
"Error: %d.\n", err);
 
static const struct algo idct_tab_arch[]
void ff_fdct_ifast(int16_t *data)
static av_cold int init(AVCodecContext *avctx)
static void ff_prores_idct_wrap(int16_t *dst)
av_cold void ff_ref_dct_init(void)
Initialize the double precision discrete cosine transform functions fdct & idct. 
void ff_faanidct(int16_t block[64])
static int permute_x86(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
void ff_simple_idct248_put(uint8_t *dest, int line_size, int16_t *block)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory. 
const uint16_t ff_aanscales[64]
static void idct248_error(const char *name, void(*idct248_put)(uint8_t *dest, int line_size, int16_t *block), int speed)
void ff_prores_idct(int16_t *block, const int16_t *qmat)
Special version of ff_simple_idct_10() which does dequantization and scales by a factor of 2 more bet...
static uint8_t img_dest1[64]
common internal API header 
int main(int argc, char **argv)
static void permute(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
static const struct algo idct_tab[]
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
void ff_simple_idct_10(int16_t *block)
#define FF_ARRAY_ELEMS(a)
void ff_jpeg_fdct_islow_8(int16_t *data)
void ff_j_rev_dct(int16_t *data)
void ff_faandct(int16_t *data)
static void test(const char *pattern, const char *host)
void ff_simple_idct_12(int16_t *block)
static int getopt(int argc, char *argv[], char *opts)
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG. 
static const struct algo fdct_tab_arch[]
void ff_ref_fdct(short *block)
Transform 8x8 block of data with a double precision forward DCT   This is a reference implementation...
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU. 
void ff_xvid_idct(int16_t *const in)
int64_t av_gettime_relative(void)
Get the current time in microseconds since some unspecified starting point. 
static uint8_t img_dest[64]
AAN (Arai, Agui and Nakajima) (I)DCT tables. 
common internal and external API header 
static int ref[MAX_W *MAX_W]
#define LOCAL_ALIGNED(a, t, v,...)
enum idct_permutation_type perm_type
void ff_ref_idct(short *block)
Transform 8x8 block of data with a double precision inverse DCT   This is a reference implementation...
static int16_t block1[64]
void(* func)(int16_t *block)
static const struct algo fdct_tab[]
void ff_simple_idct_8(int16_t *block)