FFmpeg
dct.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "config.h"
20 
21 #include "libavutil/cpu.h"
22 #include "libavutil/mem_internal.h"
23 
24 #include "libavcodec/x86/fdct.h"
27 
28 #if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_X86ASM
29 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
30  int16_t *block, int16_t *qmat);
31 
32 #define PR_WRAP(INSN) \
33 static void ff_prores_idct_put_10_##INSN##_wrap(int16_t *dst){ \
34  LOCAL_ALIGNED(16, int16_t, qmat, [64]); \
35  LOCAL_ALIGNED(16, int16_t, tmp, [64]); \
36  int i; \
37  \
38  for(i=0; i<64; i++){ \
39  qmat[i]=4; \
40  tmp[i]= dst[i]; \
41  } \
42  ff_prores_idct_put_10_##INSN (dst, 16, tmp, qmat); \
43  \
44  for(i=0; i<64; i++) { \
45  dst[i] -= 512; \
46  } \
47 }
48 
49 PR_WRAP(sse2)
50 
51 # if HAVE_AVX_EXTERNAL
52 void ff_prores_idct_put_10_avx(uint16_t *dst, int linesize,
53  int16_t *block, int16_t *qmat);
54 PR_WRAP(avx)
55 # endif
56 
57 #endif
58 
59 static const struct algo fdct_tab_arch[] = {
60 #if HAVE_MMX_INLINE
62 #endif
63 #if HAVE_MMXEXT_INLINE
65 #endif
66 #if HAVE_SSE2_INLINE
68 #endif
69  { 0 }
70 };
71 
72 static const struct algo idct_tab_arch[] = {
73 #if HAVE_MMX_EXTERNAL
75 #endif
76 #if CONFIG_MPEG4_DECODER && HAVE_X86ASM
77 #if ARCH_X86_32
80 #endif
81 #if HAVE_SSE2_EXTERNAL
83 #endif
84 #endif /* CONFIG_MPEG4_DECODER && HAVE_X86ASM */
85 #if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_X86ASM
86  { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 },
87 # if HAVE_AVX_EXTERNAL
88  { "PR-AVX", ff_prores_idct_put_10_avx_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 },
89 # endif
90 #endif
91 #if HAVE_X86ASM
92 #if ARCH_X86_64
93 #if HAVE_SSE2_EXTERNAL
97 #endif
98 #if HAVE_AVX_EXTERNAL
102 #endif
103 #endif
104 #endif
105  { 0 }
106 };
107 
108 static const uint8_t idct_simple_mmx_perm[64] = {
109  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
110  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
111  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
112  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
113  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
114  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
115  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
116  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
117 };
118 
119 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
120 
121 static int permute_x86(int16_t dst[64], const int16_t src[64],
123 {
124  int i;
125 
126  switch (perm_type) {
127  case FF_IDCT_PERM_SIMPLE:
128  for (i = 0; i < 64; i++)
129  dst[idct_simple_mmx_perm[i]] = src[i];
130  return 1;
131  case FF_IDCT_PERM_SSE2:
132  for (i = 0; i < 64; i++)
133  dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
134  return 1;
135  }
136 
137  return 0;
138 }
ff_simple_idct12_avx
void ff_simple_idct12_avx(int16_t *block)
mem_internal.h
ff_fdct_mmxext
void ff_fdct_mmxext(int16_t *block)
fdct.h
idct_sse2_row_perm
static const uint8_t idct_sse2_row_perm[8]
Definition: dct.c:119
ff_simple_idct10_avx
void ff_simple_idct10_avx(int16_t *block)
xvididct.h
ff_xvid_idct_mmxext
void ff_xvid_idct_mmxext(short *block)
ff_fdct_sse2
void ff_fdct_sse2(int16_t *block)
ff_simple_idct8_avx
void ff_simple_idct8_avx(int16_t *block)
src
#define src
Definition: vp8dsp.c:255
idct_tab_arch
static const struct algo idct_tab_arch[]
Definition: dct.c:72
AV_CPU_FLAG_SSE2
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
Definition: cpu.h:34
idct_simple_mmx_perm
static const uint8_t idct_simple_mmx_perm[64]
Definition: dct.c:108
AV_CPU_FLAG_AVX
#define AV_CPU_FLAG_AVX
AVX functions: requires OS support even if YMM registers aren't used.
Definition: cpu.h:47
ff_prores_idct_put_10_avx
void ff_prores_idct_put_10_avx(uint16_t *dst, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
cpu.h
FF_IDCT_PERM_NONE
@ FF_IDCT_PERM_NONE
Definition: idctdsp.h:38
ff_fdct_mmx
void ff_fdct_mmx(int16_t *block)
ff_xvid_idct_sse2
void ff_xvid_idct_sse2(short *block)
ff_simple_idct12_sse2
void ff_simple_idct12_sse2(int16_t *block)
permute_x86
static int permute_x86(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
Definition: dct.c:121
ff_prores_idct_put_10_sse2
void ff_prores_idct_put_10_sse2(uint16_t *dst, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
algo
Definition: dct.c:53
ff_xvid_idct_mmx
void ff_xvid_idct_mmx(short *block)
FF_IDCT_PERM_TRANSPOSE
@ FF_IDCT_PERM_TRANSPOSE
Definition: idctdsp.h:41
simple_idct.h
FF_IDCT_PERM_SSE2
@ FF_IDCT_PERM_SSE2
Definition: idctdsp.h:43
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
ff_simple_idct_mmx
void ff_simple_idct_mmx(int16_t *block)
AV_CPU_FLAG_MMXEXT
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:30
idct_permutation_type
idct_permutation_type
Definition: idctdsp.h:37
ff_simple_idct10_sse2
void ff_simple_idct10_sse2(int16_t *block)
algo::perm_type
enum idct_permutation_type perm_type
Definition: dct.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
ff_simple_idct8_sse2
void ff_simple_idct8_sse2(int16_t *block)
FF_IDCT_PERM_SIMPLE
@ FF_IDCT_PERM_SIMPLE
Definition: idctdsp.h:40
fdct_tab_arch
static const struct algo fdct_tab_arch[]
Definition: dct.c:59