FFmpeg
Main Page
Related Pages
Modules
Namespaces
Data Structures
Files
Examples
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
Pages
libavcodec
x86
mpegvideoencdsp_init.c
Go to the documentation of this file.
1
/*
2
* This file is part of FFmpeg.
3
*
4
* FFmpeg is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU Lesser General Public
6
* License as published by the Free Software Foundation; either
7
* version 2.1 of the License, or (at your option) any later version.
8
*
9
* FFmpeg is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
* Lesser General Public License for more details.
13
*
14
* You should have received a copy of the GNU Lesser General Public
15
* License along with FFmpeg; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
*/
18
19
#include "
libavutil/attributes.h
"
20
#include "
libavutil/avassert.h
"
21
#include "
libavutil/cpu.h
"
22
#include "
libavutil/x86/cpu.h
"
23
#include "
libavcodec/avcodec.h
"
24
#include "
libavcodec/mpegvideoencdsp.h
"
25
26
int
ff_pix_sum16_mmx
(
uint8_t
*pix,
int
line_size);
27
int
ff_pix_sum16_sse2
(
uint8_t
*pix,
int
line_size);
28
int
ff_pix_sum16_xop
(
uint8_t
*pix,
int
line_size);
29
int
ff_pix_norm1_mmx
(
uint8_t
*pix,
int
line_size);
30
int
ff_pix_norm1_sse2
(
uint8_t
*pix,
int
line_size);
31
32
#if HAVE_INLINE_ASM
33
34
#define PHADDD(a, t) \
35
"movq " #a ", " #t " \n\t" \
36
"psrlq $32, " #a " \n\t" \
37
"paddd " #t ", " #a " \n\t"
38
39
/*
40
* pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
41
* pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
42
* pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
43
*/
44
#define PMULHRW(x, y, s, o) \
45
"pmulhw " #s ", " #x " \n\t" \
46
"pmulhw " #s ", " #y " \n\t" \
47
"paddw " #o ", " #x " \n\t" \
48
"paddw " #o ", " #y " \n\t" \
49
"psraw $1, " #x " \n\t" \
50
"psraw $1, " #y " \n\t"
51
#define DEF(x) x ## _mmx
52
#define SET_RND MOVQ_WONE
53
#define SCALE_OFFSET 1
54
55
#include "
mpegvideoenc_qns_template.c
"
56
57
#undef DEF
58
#undef SET_RND
59
#undef SCALE_OFFSET
60
#undef PMULHRW
61
62
#define DEF(x) x ## _3dnow
63
#define SET_RND(x)
64
#define SCALE_OFFSET 0
65
#define PMULHRW(x, y, s, o) \
66
"pmulhrw " #s ", " #x " \n\t" \
67
"pmulhrw " #s ", " #y " \n\t"
68
69
#include "
mpegvideoenc_qns_template.c
"
70
71
#undef DEF
72
#undef SET_RND
73
#undef SCALE_OFFSET
74
#undef PMULHRW
75
76
#if HAVE_SSSE3_INLINE
77
#undef PHADDD
78
#define DEF(x) x ## _ssse3
79
#define SET_RND(x)
80
#define SCALE_OFFSET -1
81
82
#define PHADDD(a, t) \
83
"pshufw $0x0E, " #a ", " #t " \n\t" \
84
/* faster than phaddd on core2 */
\
85
"paddd " #t ", " #a " \n\t"
86
87
#define PMULHRW(x, y, s, o) \
88
"pmulhrsw " #s ", " #x " \n\t" \
89
"pmulhrsw " #s ", " #y " \n\t"
90
91
#include "
mpegvideoenc_qns_template.c
"
92
93
#undef DEF
94
#undef SET_RND
95
#undef SCALE_OFFSET
96
#undef PMULHRW
97
#undef PHADDD
98
#endif
/* HAVE_SSSE3_INLINE */
99
100
/* Draw the edges of width 'w' of an image of size width, height
101
* this MMX version can only handle w == 8 || w == 16. */
102
static
void
draw_edges_mmx(
uint8_t
*
buf
,
int
wrap
,
int
width
,
int
height
,
103
int
w,
int
h,
int
sides)
104
{
105
uint8_t
*ptr, *last_line;
106
int
i;
107
108
last_line = buf + (height - 1) * wrap;
109
/* left and right */
110
ptr =
buf
;
111
if
(w == 8) {
112
__asm__
volatile
(
113
"1: \n\t"
114
"movd (%0), %%mm0 \n\t"
115
"punpcklbw %%mm0, %%mm0 \n\t"
116
"punpcklwd %%mm0, %%mm0 \n\t"
117
"punpckldq %%mm0, %%mm0 \n\t"
118
"movq %%mm0, -8(%0) \n\t"
119
"movq -8(%0, %2), %%mm1 \n\t"
120
"punpckhbw %%mm1, %%mm1 \n\t"
121
"punpckhwd %%mm1, %%mm1 \n\t"
122
"punpckhdq %%mm1, %%mm1 \n\t"
123
"movq %%mm1, (%0, %2) \n\t"
124
"add %1, %0 \n\t"
125
"cmp %3, %0 \n\t"
126
"jb 1b \n\t"
127
:
"+r"
(ptr)
128
:
"r"
((
x86_reg
)
wrap
),
"r"
((
x86_reg
)
width
),
129
"r"
(ptr + wrap * height));
130
}
else
if
(w == 16) {
131
__asm__
volatile
(
132
"1: \n\t"
133
"movd (%0), %%mm0 \n\t"
134
"punpcklbw %%mm0, %%mm0 \n\t"
135
"punpcklwd %%mm0, %%mm0 \n\t"
136
"punpckldq %%mm0, %%mm0 \n\t"
137
"movq %%mm0, -8(%0) \n\t"
138
"movq %%mm0, -16(%0) \n\t"
139
"movq -8(%0, %2), %%mm1 \n\t"
140
"punpckhbw %%mm1, %%mm1 \n\t"
141
"punpckhwd %%mm1, %%mm1 \n\t"
142
"punpckhdq %%mm1, %%mm1 \n\t"
143
"movq %%mm1, (%0, %2) \n\t"
144
"movq %%mm1, 8(%0, %2) \n\t"
145
"add %1, %0 \n\t"
146
"cmp %3, %0 \n\t"
147
"jb 1b \n\t"
148
:
"+r"
(ptr)
149
:
"r"
((
x86_reg
)
wrap
),
"r"
((
x86_reg
)
width
),
"r"
(ptr + wrap * height)
150
);
151
}
else
{
152
av_assert1
(w == 4);
153
__asm__
volatile
(
154
"1: \n\t"
155
"movd (%0), %%mm0 \n\t"
156
"punpcklbw %%mm0, %%mm0 \n\t"
157
"punpcklwd %%mm0, %%mm0 \n\t"
158
"movd %%mm0, -4(%0) \n\t"
159
"movd -4(%0, %2), %%mm1 \n\t"
160
"punpcklbw %%mm1, %%mm1 \n\t"
161
"punpckhwd %%mm1, %%mm1 \n\t"
162
"punpckhdq %%mm1, %%mm1 \n\t"
163
"movd %%mm1, (%0, %2) \n\t"
164
"add %1, %0 \n\t"
165
"cmp %3, %0 \n\t"
166
"jb 1b \n\t"
167
:
"+r"
(ptr)
168
:
"r"
((
x86_reg
)
wrap
),
"r"
((
x86_reg
)
width
),
169
"r"
(ptr + wrap * height));
170
}
171
172
/* top and bottom (and hopefully also the corners) */
173
if
(sides &
EDGE_TOP
) {
174
for
(i = 0; i < h; i += 4) {
175
ptr = buf - (i + 1) * wrap - w;
176
__asm__
volatile
(
177
"1: \n\t"
178
"movq (%1, %0), %%mm0 \n\t"
179
"movq %%mm0, (%0) \n\t"
180
"movq %%mm0, (%0, %2) \n\t"
181
"movq %%mm0, (%0, %2, 2) \n\t"
182
"movq %%mm0, (%0, %3) \n\t"
183
"add $8, %0 \n\t"
184
"cmp %4, %0 \n\t"
185
"jb 1b \n\t"
186
:
"+r"
(ptr)
187
:
"r"
((
x86_reg
) buf - (
x86_reg
) ptr - w),
188
"r"
((
x86_reg
) - wrap),
"r"
((
x86_reg
) - wrap * 3),
189
"r"
(ptr + width + 2 * w));
190
}
191
}
192
193
if
(sides &
EDGE_BOTTOM
) {
194
for
(i = 0; i < h; i += 4) {
195
ptr = last_line + (i + 1) * wrap - w;
196
__asm__
volatile
(
197
"1: \n\t"
198
"movq (%1, %0), %%mm0 \n\t"
199
"movq %%mm0, (%0) \n\t"
200
"movq %%mm0, (%0, %2) \n\t"
201
"movq %%mm0, (%0, %2, 2) \n\t"
202
"movq %%mm0, (%0, %3) \n\t"
203
"add $8, %0 \n\t"
204
"cmp %4, %0 \n\t"
205
"jb 1b \n\t"
206
:
"+r"
(ptr)
207
:
"r"
((
x86_reg
) last_line - (
x86_reg
) ptr - w),
208
"r"
((
x86_reg
) wrap),
"r"
((
x86_reg
) wrap * 3),
209
"r"
(ptr + width + 2 * w));
210
}
211
}
212
}
213
214
#endif
/* HAVE_INLINE_ASM */
215
216
av_cold
void
ff_mpegvideoencdsp_init_x86
(
MpegvideoEncDSPContext
*
c
,
217
AVCodecContext
*avctx)
218
{
219
int
cpu_flags
=
av_get_cpu_flags
();
220
221
if
(
EXTERNAL_MMX
(cpu_flags)) {
222
c->
pix_sum
=
ff_pix_sum16_mmx
;
223
c->
pix_norm1
=
ff_pix_norm1_mmx
;
224
}
225
226
if
(
EXTERNAL_SSE2
(cpu_flags)) {
227
c->
pix_sum
=
ff_pix_sum16_sse2
;
228
c->
pix_norm1
=
ff_pix_norm1_sse2
;
229
}
230
231
if
(
EXTERNAL_XOP
(cpu_flags)) {
232
c->
pix_sum
=
ff_pix_sum16_xop
;
233
}
234
235
#if HAVE_INLINE_ASM
236
237
if
(
INLINE_MMX
(cpu_flags)) {
238
if
(!(avctx->
flags
&
CODEC_FLAG_BITEXACT
)) {
239
c->
try_8x8basis
= try_8x8basis_mmx;
240
}
241
c->
add_8x8basis
= add_8x8basis_mmx;
242
243
if
(avctx->
bits_per_raw_sample
<= 8) {
244
c->
draw_edges
= draw_edges_mmx;
245
}
246
}
247
248
if
(
INLINE_AMD3DNOW
(cpu_flags)) {
249
if
(!(avctx->
flags
&
CODEC_FLAG_BITEXACT
)) {
250
c->
try_8x8basis
= try_8x8basis_3dnow;
251
}
252
c->
add_8x8basis
= add_8x8basis_3dnow;
253
}
254
255
#if HAVE_SSSE3_INLINE
256
if
(
INLINE_SSSE3
(cpu_flags)) {
257
if
(!(avctx->
flags
&
CODEC_FLAG_BITEXACT
)) {
258
c->
try_8x8basis
= try_8x8basis_ssse3;
259
}
260
c->
add_8x8basis
= add_8x8basis_ssse3;
261
}
262
#endif
/* HAVE_SSSE3_INLINE */
263
264
#endif
/* HAVE_INLINE_ASM */
265
}
Generated on Sun Jul 20 2014 23:05:58 for FFmpeg by
1.8.2