FFmpeg
Main Page
Related Pages
Modules
Namespaces
Data Structures
Files
Examples
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
Pages
libavcodec
x86
mpegvideoencdsp_init.c
Go to the documentation of this file.
1
/*
2
* This file is part of FFmpeg.
3
*
4
* FFmpeg is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU Lesser General Public
6
* License as published by the Free Software Foundation; either
7
* version 2.1 of the License, or (at your option) any later version.
8
*
9
* FFmpeg is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
* Lesser General Public License for more details.
13
*
14
* You should have received a copy of the GNU Lesser General Public
15
* License along with FFmpeg; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
*/
18
19
#include "
libavutil/attributes.h
"
20
#include "
libavutil/avassert.h
"
21
#include "
libavutil/cpu.h
"
22
#include "
libavutil/x86/cpu.h
"
23
#include "
libavcodec/avcodec.h
"
24
#include "
libavcodec/mpegvideoencdsp.h
"
25
26
int
ff_pix_sum16_mmx
(
uint8_t
*pix,
int
line_size);
27
int
ff_pix_sum16_mmxext
(
uint8_t
*pix,
int
line_size);
28
int
ff_pix_sum16_sse2
(
uint8_t
*pix,
int
line_size);
29
int
ff_pix_sum16_xop
(
uint8_t
*pix,
int
line_size);
30
int
ff_pix_norm1_mmx
(
uint8_t
*pix,
int
line_size);
31
int
ff_pix_norm1_sse2
(
uint8_t
*pix,
int
line_size);
32
33
#if HAVE_INLINE_ASM
34
35
#define PHADDD(a, t) \
36
"movq " #a ", " #t " \n\t" \
37
"psrlq $32, " #a " \n\t" \
38
"paddd " #t ", " #a " \n\t"
39
40
/*
41
* pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
42
* pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
43
* pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
44
*/
45
#define PMULHRW(x, y, s, o) \
46
"pmulhw " #s ", " #x " \n\t" \
47
"pmulhw " #s ", " #y " \n\t" \
48
"paddw " #o ", " #x " \n\t" \
49
"paddw " #o ", " #y " \n\t" \
50
"psraw $1, " #x " \n\t" \
51
"psraw $1, " #y " \n\t"
52
#define DEF(x) x ## _mmx
53
#define SET_RND MOVQ_WONE
54
#define SCALE_OFFSET 1
55
56
#include "
mpegvideoenc_qns_template.c
"
57
58
#undef DEF
59
#undef SET_RND
60
#undef SCALE_OFFSET
61
#undef PMULHRW
62
63
#define DEF(x) x ## _3dnow
64
#define SET_RND(x)
65
#define SCALE_OFFSET 0
66
#define PMULHRW(x, y, s, o) \
67
"pmulhrw " #s ", " #x " \n\t" \
68
"pmulhrw " #s ", " #y " \n\t"
69
70
#include "
mpegvideoenc_qns_template.c
"
71
72
#undef DEF
73
#undef SET_RND
74
#undef SCALE_OFFSET
75
#undef PMULHRW
76
77
#if HAVE_SSSE3_INLINE
78
#undef PHADDD
79
#define DEF(x) x ## _ssse3
80
#define SET_RND(x)
81
#define SCALE_OFFSET -1
82
83
#define PHADDD(a, t) \
84
"pshufw $0x0E, " #a ", " #t " \n\t" \
85
/* faster than phaddd on core2 */
\
86
"paddd " #t ", " #a " \n\t"
87
88
#define PMULHRW(x, y, s, o) \
89
"pmulhrsw " #s ", " #x " \n\t" \
90
"pmulhrsw " #s ", " #y " \n\t"
91
92
#include "
mpegvideoenc_qns_template.c
"
93
94
#undef DEF
95
#undef SET_RND
96
#undef SCALE_OFFSET
97
#undef PMULHRW
98
#undef PHADDD
99
#endif
/* HAVE_SSSE3_INLINE */
100
101
/* Draw the edges of width 'w' of an image of size width, height
102
* this MMX version can only handle w == 8 || w == 16. */
103
static
void
draw_edges_mmx(
uint8_t
*
buf
,
int
wrap
,
int
width
,
int
height
,
104
int
w,
int
h,
int
sides)
105
{
106
uint8_t
*ptr, *last_line;
107
int
i;
108
109
last_line = buf + (height - 1) * wrap;
110
/* left and right */
111
ptr =
buf
;
112
if
(w == 8) {
113
__asm__
volatile
(
114
"1: \n\t"
115
"movd (%0), %%mm0 \n\t"
116
"punpcklbw %%mm0, %%mm0 \n\t"
117
"punpcklwd %%mm0, %%mm0 \n\t"
118
"punpckldq %%mm0, %%mm0 \n\t"
119
"movq %%mm0, -8(%0) \n\t"
120
"movq -8(%0, %2), %%mm1 \n\t"
121
"punpckhbw %%mm1, %%mm1 \n\t"
122
"punpckhwd %%mm1, %%mm1 \n\t"
123
"punpckhdq %%mm1, %%mm1 \n\t"
124
"movq %%mm1, (%0, %2) \n\t"
125
"add %1, %0 \n\t"
126
"cmp %3, %0 \n\t"
127
"jb 1b \n\t"
128
:
"+r"
(ptr)
129
:
"r"
((
x86_reg
)
wrap
),
"r"
((
x86_reg
)
width
),
130
"r"
(ptr + wrap * height));
131
}
else
if
(w == 16) {
132
__asm__
volatile
(
133
"1: \n\t"
134
"movd (%0), %%mm0 \n\t"
135
"punpcklbw %%mm0, %%mm0 \n\t"
136
"punpcklwd %%mm0, %%mm0 \n\t"
137
"punpckldq %%mm0, %%mm0 \n\t"
138
"movq %%mm0, -8(%0) \n\t"
139
"movq %%mm0, -16(%0) \n\t"
140
"movq -8(%0, %2), %%mm1 \n\t"
141
"punpckhbw %%mm1, %%mm1 \n\t"
142
"punpckhwd %%mm1, %%mm1 \n\t"
143
"punpckhdq %%mm1, %%mm1 \n\t"
144
"movq %%mm1, (%0, %2) \n\t"
145
"movq %%mm1, 8(%0, %2) \n\t"
146
"add %1, %0 \n\t"
147
"cmp %3, %0 \n\t"
148
"jb 1b \n\t"
149
:
"+r"
(ptr)
150
:
"r"
((
x86_reg
)
wrap
),
"r"
((
x86_reg
)
width
),
"r"
(ptr + wrap * height)
151
);
152
}
else
{
153
av_assert1
(w == 4);
154
__asm__
volatile
(
155
"1: \n\t"
156
"movd (%0), %%mm0 \n\t"
157
"punpcklbw %%mm0, %%mm0 \n\t"
158
"punpcklwd %%mm0, %%mm0 \n\t"
159
"movd %%mm0, -4(%0) \n\t"
160
"movd -4(%0, %2), %%mm1 \n\t"
161
"punpcklbw %%mm1, %%mm1 \n\t"
162
"punpckhwd %%mm1, %%mm1 \n\t"
163
"punpckhdq %%mm1, %%mm1 \n\t"
164
"movd %%mm1, (%0, %2) \n\t"
165
"add %1, %0 \n\t"
166
"cmp %3, %0 \n\t"
167
"jb 1b \n\t"
168
:
"+r"
(ptr)
169
:
"r"
((
x86_reg
)
wrap
),
"r"
((
x86_reg
)
width
),
170
"r"
(ptr + wrap * height));
171
}
172
173
/* top and bottom (and hopefully also the corners) */
174
if
(sides &
EDGE_TOP
) {
175
for
(i = 0; i < h; i += 4) {
176
ptr = buf - (i + 1) * wrap - w;
177
__asm__
volatile
(
178
"1: \n\t"
179
"movq (%1, %0), %%mm0 \n\t"
180
"movq %%mm0, (%0) \n\t"
181
"movq %%mm0, (%0, %2) \n\t"
182
"movq %%mm0, (%0, %2, 2) \n\t"
183
"movq %%mm0, (%0, %3) \n\t"
184
"add $8, %0 \n\t"
185
"cmp %4, %0 \n\t"
186
"jb 1b \n\t"
187
:
"+r"
(ptr)
188
:
"r"
((
x86_reg
) buf - (
x86_reg
) ptr - w),
189
"r"
((
x86_reg
) - wrap),
"r"
((
x86_reg
) - wrap * 3),
190
"r"
(ptr + width + 2 * w));
191
}
192
}
193
194
if
(sides &
EDGE_BOTTOM
) {
195
for
(i = 0; i < h; i += 4) {
196
ptr = last_line + (i + 1) * wrap - w;
197
__asm__
volatile
(
198
"1: \n\t"
199
"movq (%1, %0), %%mm0 \n\t"
200
"movq %%mm0, (%0) \n\t"
201
"movq %%mm0, (%0, %2) \n\t"
202
"movq %%mm0, (%0, %2, 2) \n\t"
203
"movq %%mm0, (%0, %3) \n\t"
204
"add $8, %0 \n\t"
205
"cmp %4, %0 \n\t"
206
"jb 1b \n\t"
207
:
"+r"
(ptr)
208
:
"r"
((
x86_reg
) last_line - (
x86_reg
) ptr - w),
209
"r"
((
x86_reg
) wrap),
"r"
((
x86_reg
) wrap * 3),
210
"r"
(ptr + width + 2 * w));
211
}
212
}
213
}
214
215
#endif
/* HAVE_INLINE_ASM */
216
217
av_cold
void
ff_mpegvideoencdsp_init_x86
(
MpegvideoEncDSPContext
*
c
,
218
AVCodecContext
*avctx)
219
{
220
int
cpu_flags =
av_get_cpu_flags
();
221
222
#if ARCH_X86_32
223
if
(
EXTERNAL_MMX
(cpu_flags)) {
224
c->
pix_sum
=
ff_pix_sum16_mmx
;
225
c->
pix_norm1
=
ff_pix_norm1_mmx
;
226
}
227
228
if
(
EXTERNAL_MMXEXT
(cpu_flags)) {
229
c->
pix_sum
=
ff_pix_sum16_mmxext
;
230
}
231
#endif
232
233
if
(
EXTERNAL_SSE2
(cpu_flags)) {
234
c->
pix_sum
=
ff_pix_sum16_sse2
;
235
c->
pix_norm1
=
ff_pix_norm1_sse2
;
236
}
237
238
if
(
EXTERNAL_XOP
(cpu_flags)) {
239
c->
pix_sum
=
ff_pix_sum16_xop
;
240
}
241
242
#if HAVE_INLINE_ASM
243
244
if
(
INLINE_MMX
(cpu_flags)) {
245
if
(!(avctx->
flags
&
CODEC_FLAG_BITEXACT
)) {
246
c->
try_8x8basis
= try_8x8basis_mmx;
247
}
248
c->
add_8x8basis
= add_8x8basis_mmx;
249
250
if
(avctx->
bits_per_raw_sample
<= 8) {
251
c->
draw_edges
= draw_edges_mmx;
252
}
253
}
254
255
if
(
INLINE_AMD3DNOW
(cpu_flags)) {
256
if
(!(avctx->
flags
&
CODEC_FLAG_BITEXACT
)) {
257
c->
try_8x8basis
= try_8x8basis_3dnow;
258
}
259
c->
add_8x8basis
= add_8x8basis_3dnow;
260
}
261
262
#if HAVE_SSSE3_INLINE
263
if
(
INLINE_SSSE3
(cpu_flags)) {
264
if
(!(avctx->
flags
&
CODEC_FLAG_BITEXACT
)) {
265
c->
try_8x8basis
= try_8x8basis_ssse3;
266
}
267
c->
add_8x8basis
= add_8x8basis_ssse3;
268
}
269
#endif
/* HAVE_SSSE3_INLINE */
270
271
#endif
/* HAVE_INLINE_ASM */
272
}
Generated on Fri Dec 5 2014 04:42:06 for FFmpeg by
1.8.2