Go to the documentation of this file.
29 #include "config_components.h"
96 #if CONFIG_MPEG4_DECODER
114 #define AANSCALE_BITS 12
117 #define NB_ITS_SPEED 50000
130 for (
i = 0;
i < 64;
i++)
134 for (
i = 0;
i < 64;
i++)
140 for (
i = 0;
i < j;
i++) {
164 for (
i = 0;
i < 64;
i++)
165 dst[(
i & 0x38) | ((
i & 6) >> 1) | ((
i & 1) << 2)] =
src[
i];
168 for (
i = 0;
i < 64;
i++)
169 dst[(
i & 0x24) | ((
i & 3) << 3) | ((
i >> 3) & 3)] =
src[
i];
172 for (
i = 0;
i < 64;
i++)
173 dst[(
i>>3) | ((
i<<3)&0x38)] =
src[
i];
176 for (
i = 0;
i < 64;
i++)
187 int64_t err2, ti, ti1, it1, err_sum = 0;
188 int64_t sysErr[64], sysErrMax = 0;
189 int64_t err2_matrix[64], err2_max = 0;
191 int blockSumErrMax = 0, blockSumErr;
193 const int vals=1<<
bits;
201 for (
i = 0;
i < 64;
i++)
202 err2_matrix[
i] = sysErr[
i] = 0;
210 if (!strcmp(
dct->name,
"IJG-AAN-INT")) {
211 for (
i = 0;
i < 64;
i++) {
218 if (!strcmp(
dct->name,
"PR-SSE2"))
219 for (
i = 0;
i < 64;
i++)
223 for (
i = 0;
i < 64;
i++) {
229 err2_matrix[
i] += v * v;
236 if (blockSumErrMax < blockSumErr)
237 blockSumErrMax = blockSumErr;
239 for (
i = 0;
i < 64;
i++) {
241 err2_max =
FFMAX(err2_max ,
FFABS(err2_matrix[
i]));
244 for (
i = 0;
i < 64;
i++) {
247 printf(
"%7d ", (
int) sysErr[
i]);
254 spec_err = is_idct && (err_inf > 1 || omse > 0.02 ||
fabs(ome) > 0.0015);
256 spec_err = is_idct && ((
double) err2_max /
NB_ITS > 0.06 || (
double) sysErrMax /
NB_ITS > 0.015);
258 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
259 is_idct ?
"IDCT" :
"DCT",
dct->name, err_inf,
260 omse, ome, (
double) sysErrMax /
NB_ITS,
261 maxout, blockSumErrMax);
263 if (spec_err && !
dct->nonspec) {
286 }
while (ti1 < 1000000);
288 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT",
dct->name,
289 (
double) it1 * 1000.0 / (
double) ti1);
300 static double c8[8][8];
301 static double c4[4][4];
302 double block1[64], block2[64], block3[64];
309 for (
i = 0;
i < 8;
i++) {
311 for (j = 0; j < 8; j++) {
312 s = (
i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
313 c8[
i][j] =
s * cos(
M_PI *
i * (j + 0.5) / 8.0);
314 sum += c8[
i][j] * c8[
i][j];
318 for (
i = 0;
i < 4;
i++) {
320 for (j = 0; j < 4; j++) {
321 s = (
i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
322 c4[
i][j] =
s * cos(
M_PI *
i * (j + 0.5) / 4.0);
323 sum += c4[
i][j] * c4[
i][j];
330 for (
i = 0;
i < 4;
i++) {
331 for (j = 0; j < 8; j++) {
340 for (
i = 0;
i < 8;
i++) {
341 for (j = 0; j < 8; j++) {
343 for (k = 0; k < 8; k++)
344 sum += c8[k][j] *
block1[8 *
i + k];
345 block2[8 *
i + j] = sum;
350 for (
i = 0;
i < 8;
i++) {
351 for (j = 0; j < 4; j++) {
354 for (k = 0; k < 4; k++)
355 sum += c4[k][j] * block2[8 * (2 * k) +
i];
356 block3[8 * (2 * j) +
i] = sum;
360 for (k = 0; k < 4; k++)
361 sum += c4[k][j] * block2[8 * (2 * k + 1) +
i];
362 block3[8 * (2 * j + 1) +
i] = sum;
367 for (
i = 0;
i < 8;
i++) {
368 for (j = 0; j < 8; j++) {
369 v = block3[8 *
i + j];
371 else if (v > 255) v = 255;
372 dest[
i * linesize + j] = (
int)
rint(v);
378 void (*idct248_put)(uint8_t *dest,
383 int it,
i, it1, ti, ti1, err_max, v;
393 for (
i = 0;
i < 64;
i++)
397 for (
i = 0;
i < 64;
i++)
401 for (
i = 0;
i < 64;
i++)
405 for (
i = 0;
i < 64;
i++) {
432 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248",
name, err_max);
441 for (
i = 0;
i < 64;
i++)
448 }
while (ti1 < 1000000);
450 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248",
name,
451 (
double) it1 * 1000.0 / (
double) ti1);
456 printf(
"dct-test [-i] [<test-number>] [<bits>]\n"
457 "test-number 0 -> test with random matrixes\n"
458 " 1 -> test with random sparse matrixes\n"
459 " 2 -> do 3. test from MPEG-4 std\n"
460 "bits Number of time domain bits to use, 8 is default\n"
461 "-i test IDCT implementations\n"
462 "-4 test IDCT248 implementations\n"
470 int main(
int argc,
char **argv)
472 int test_idct = 0, test_248_dct = 0;
482 c =
getopt(argc, argv,
"ih4t");
506 printf(
"ffmpeg DCT/IDCT test\n");
533 printf(
"Error: %d.\n", err);
int64_t av_gettime_relative(void)
Get the current time in microseconds since some unspecified starting point.
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
av_cold void ff_ref_dct_init(void)
Initialize the double precision discrete cosine transform functions fdct & idct.
static void ff_prores_idct_wrap(int16_t *dst)
static uint8_t img_dest[64]
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
void ff_simple_idct_int16_10bit(int16_t *block)
int main(int argc, char **argv)
static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
static void idct248_ref(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
static av_always_inline float scale(float x, float s)
#define FF_ARRAY_ELEMS(a)
static int getopt(int argc, char *argv[], char *opts)
#define LOCAL_ALIGNED(a, t, v,...)
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
void ff_xvid_idct(int16_t *const in)
void ff_faanidct(int16_t block[64])
int(* init)(AVBSFContext *ctx)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static __device__ float fabs(float a)
static const struct algo idct_tab[]
void ff_faandct(int16_t *data)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Context structure for the Lagged Fibonacci PRNG.
#define DECLARE_ALIGNED(n, t, v)
printf("static const uint8_t my_array[100] = {\n")
void(* func)(int16_t *block)
static const struct algo fdct_tab_arch[]
static int permute_x86(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
void ff_simple_idct_int16_8bit(int16_t *block)
#define i(width, name, range_min, range_max)
static void permute(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
void ff_jpeg_fdct_islow_8(int16_t *data)
static const struct algo fdct_tab[]
static void dct(AudioRNNContext *s, float *out, const float *in)
void ff_j_rev_dct(int16_t *data)
void ff_fdct_ifast(int16_t *data)
static void idct248_error(const char *name, void(*idct248_put)(uint8_t *dest, ptrdiff_t line_size, int16_t *block), int speed)
void ff_simple_idct_int16_12bit(int16_t *block)
static int ref[MAX_W *MAX_W]
void ff_ref_fdct(short *block)
Transform 8x8 block of data with a double precision forward DCT This is a reference implementation.
static const struct algo idct_tab_arch[]
void ff_prores_idct_10(int16_t *block, const int16_t *qmat)
Special version of ff_simple_idct_int16_10bit() which does dequantization and scales by a factor of 2...
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s it
void ff_ref_idct(short *block)
Transform 8x8 block of data with a double precision inverse DCT This is a reference implementation.
enum idct_permutation_type perm_type
static uint8_t img_dest1[64]
static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
static int16_t block1[64]
const uint16_t ff_aanscales[64]