FFmpeg
qpeldsp_init.c
Go to the documentation of this file.
1 /*
2  * quarterpel DSP functions
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include <stddef.h>
24 #include <stdint.h>
25 
26 #include "config.h"
27 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/x86/cpu.h"
32 #include "libavcodec/qpeldsp.h"
33 #include "fpel.h"
34 #include "qpel.h"
35 
38  const uint8_t *src1, const uint8_t *src2,
39  ptrdiff_t dstStride, ptrdiff_t src1Stride);
41  const uint8_t *src1, const uint8_t *src2,
42  ptrdiff_t dstStride, ptrdiff_t src1Stride);
43 
44 #define QPEL_H(OPNAME, RND, SIZE, UNUSED1, XMM, UNUSED2, UNUSED3, L2) \
45 void ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_ ## XMM (uint8_t *dst, \
46  const uint8_t *src, \
47  ptrdiff_t dstStride, \
48  ptrdiff_t srcStride, \
49  int h); \
50 void ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_l2_ ## XMM(uint8_t *dst, \
51  const uint8_t *src, \
52  ptrdiff_t dstStride, \
53  ptrdiff_t srcStride, \
54  int h, \
55  ptrdiff_t l2_offset);\
56 static void OPNAME ## _qpel ## SIZE ## _mc10_ ## XMM(uint8_t *dst, \
57  const uint8_t *src, \
58  ptrdiff_t stride) \
59 { \
60  ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_l2_ ## XMM(dst, src, stride, \
61  stride, SIZE, 0); \
62 } \
63  \
64 static void OPNAME ## _qpel ## SIZE ## _mc20_ ## XMM(uint8_t *dst, \
65  const uint8_t *src, \
66  ptrdiff_t stride) \
67 { \
68  ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_ ## XMM(dst, src, stride, \
69  stride, SIZE); \
70 } \
71  \
72 static void OPNAME ## _qpel ## SIZE ## _mc30_ ## XMM(uint8_t *dst, \
73  const uint8_t *src, \
74  ptrdiff_t stride) \
75 { \
76  ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _h_lowpass_l2_ ## XMM(dst, src, stride, \
77  stride, SIZE, 1); \
78 }
79 
80 #define QPEL_V(OPNAME, RND, SIZE, UNUSED1, UNUSED2, XMM, UNUSED3, L2) \
81 void ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## XMM (uint8_t *dst, \
82  const uint8_t *src, \
83  ptrdiff_t dstStride, \
84  ptrdiff_t srcStride); \
85 static void OPNAME ## _qpel ## SIZE ## _mc01_ ## XMM(uint8_t *dst, \
86  const uint8_t *src, \
87  ptrdiff_t stride) \
88 { \
89  DECLARE_ALIGNED(SIZE, uint8_t, half)[SIZE*SIZE]; \
90  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## XMM(half, src, \
91  SIZE, stride); \
92  ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, src, half, \
93  stride, stride); \
94 } \
95  \
96 static void OPNAME ## _qpel ## SIZE ## _mc02_ ## XMM(uint8_t *dst, \
97  const uint8_t *src, \
98  ptrdiff_t stride) \
99 { \
100  ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## XMM(dst, src, \
101  stride, stride); \
102 } \
103  \
104 static void OPNAME ## _qpel ## SIZE ## _mc03_ ## XMM(uint8_t *dst, \
105  const uint8_t *src, \
106  ptrdiff_t stride) \
107 { \
108  DECLARE_ALIGNED(SIZE, uint8_t, half)[SIZE*SIZE]; \
109  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## XMM(half, src, \
110  SIZE, stride); \
111  ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, src + stride, \
112  half, stride, stride); \
113 }
114 
115 #define QPEL_HV(OPNAME, RND, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
116 static void OPNAME ## _qpel ## SIZE ## _mc11_ ## HVXMM(uint8_t *dst, \
117  const uint8_t *src, \
118  ptrdiff_t stride) \
119 { \
120  DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
121  uint8_t *const halfH = half + SIZE*SIZE; \
122  uint8_t *const halfHV = half; \
123  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
124  stride, SIZEP1, 0); \
125  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
126  SIZE, SIZE); \
127  ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH, halfHV, \
128  stride, SIZE); \
129 } \
130  \
131 static void OPNAME ## _qpel ## SIZE ## _mc31_ ## HVXMM(uint8_t *dst, \
132  const uint8_t *src, \
133  ptrdiff_t stride) \
134 { \
135  DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
136  uint8_t *const halfH = half + SIZE*SIZE; \
137  uint8_t *const halfHV = half; \
138  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
139  stride, SIZEP1, 1); \
140  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
141  SIZE, SIZE); \
142  ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH, halfHV, \
143  stride, SIZE); \
144 } \
145  \
146 static void OPNAME ## _qpel ## SIZE ## _mc13_ ## HVXMM(uint8_t *dst, \
147  const uint8_t *src, \
148  ptrdiff_t stride) \
149 { \
150  DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
151  uint8_t *const halfH = half + SIZE*SIZE; \
152  uint8_t *const halfHV = half; \
153  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
154  stride, SIZEP1, 0); \
155  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
156  SIZE, SIZE); \
157  ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH + SIZE, \
158  halfHV, stride, SIZE); \
159 } \
160  \
161 static void OPNAME ## _qpel ## SIZE ## _mc33_ ## HVXMM(uint8_t *dst, \
162  const uint8_t *src, \
163  ptrdiff_t stride) \
164 { \
165  DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
166  uint8_t *const halfH = half + SIZE*SIZE; \
167  uint8_t *const halfHV = half; \
168  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
169  stride, SIZEP1, 1); \
170  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
171  SIZE, SIZE); \
172  ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH + SIZE, \
173  halfHV, stride, SIZE); \
174 } \
175  \
176 static void OPNAME ## _qpel ## SIZE ## _mc21_ ## HVXMM(uint8_t *dst, \
177  const uint8_t *src, \
178  ptrdiff_t stride) \
179 { \
180  DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
181  uint8_t *const halfH = half + SIZE*SIZE; \
182  uint8_t *const halfHV = half; \
183  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_ ## HXMM(halfH, src, SIZE, \
184  stride, SIZEP1); \
185  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
186  SIZE, SIZE); \
187  ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH, halfHV, \
188  stride, SIZE); \
189 } \
190  \
191 static void OPNAME ## _qpel ## SIZE ## _mc23_ ## HVXMM(uint8_t *dst, \
192  const uint8_t *src, \
193  ptrdiff_t stride) \
194 { \
195  DECLARE_ALIGNED(SIZE, uint8_t, half)[(SIZE + SIZEP1)*SIZE]; \
196  uint8_t *const halfH = half + SIZE*SIZE; \
197  uint8_t *const halfHV = half; \
198  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_ ## HXMM(halfH, src, SIZE, \
199  stride, SIZEP1); \
200  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _v_lowpass_ ## VXMM(halfHV, halfH, \
201  SIZE, SIZE); \
202  ff_ ## OPNAME ## _pixels ## SIZE ## x ## SIZE ## _l2_ ## L2(dst, halfH + SIZE, \
203  halfHV, stride, SIZE); \
204 } \
205  \
206 static void OPNAME ## _qpel ## SIZE ## _mc12_ ## HVXMM(uint8_t *dst, \
207  const uint8_t *src, \
208  ptrdiff_t stride) \
209 { \
210  DECLARE_ALIGNED(SIZE, uint8_t, halfH)[SIZEP1*SIZE]; \
211  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
212  stride, SIZEP1, 0); \
213  ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## VXMM(dst, halfH, \
214  stride, SIZE); \
215 } \
216  \
217 static void OPNAME ## _qpel ## SIZE ## _mc32_ ## HVXMM(uint8_t *dst, \
218  const uint8_t *src, \
219  ptrdiff_t stride) \
220 { \
221  DECLARE_ALIGNED(SIZE, uint8_t, halfH)[SIZEP1*SIZE]; \
222  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_l2_ ## HXMM(halfH, src, SIZE, \
223  stride, SIZEP1, 1); \
224  ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## VXMM(dst, halfH, \
225  stride, SIZE); \
226 } \
227  \
228 static void OPNAME ## _qpel ## SIZE ## _mc22_ ## HVXMM(uint8_t *dst, \
229  const uint8_t *src, \
230  ptrdiff_t stride) \
231 { \
232  DECLARE_ALIGNED(SIZE, uint8_t, halfH)[SIZEP1*SIZE]; \
233  ff_mpeg4_put_ ## RND ## qpel ## SIZE ## _h_lowpass_ ## HXMM(halfH, src, SIZE, \
234  stride, SIZEP1); \
235  ff_mpeg4_ ## OPNAME ## _qpel ## SIZE ## _v_lowpass_ ## VXMM(dst, halfH, \
236  stride, SIZE); \
237 }
238 
239 #define QPEL3(MACRO, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
240 MACRO(put,, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
241 MACRO(avg,, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2) \
242 MACRO(put_no_rnd, no_rnd_, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2)
243 
244 QPEL3(QPEL_H, 8, 9, ssse3, sse2, ssse3, mmxext)
245 QPEL3(QPEL_H, 16, 17, ssse3, sse2, ssse3, sse2)
246 QPEL3(QPEL_V, 8, 9, ssse3, sse2, ssse3, mmxext)
247 QPEL3(QPEL_HV, 8, 9, ssse3, sse2, ssse3, mmxext)
248 QPEL3(QPEL_V, 16, 17, ssse3, sse2, ssse3, sse2)
249 QPEL3(QPEL_HV, 16, 17, ssse3, sse2, ssse3, sse2)
250 
251 #define SET_QPEL_FUNC(OP, X, Y, SIZE, CPU, PREFIX) \
252  c->OP ## _qpel_pixels_tab[SIZE == 8][X+4*Y] = PREFIX ## OP ## _qpel ## SIZE ## _mc ## X ## Y ## _ ## CPU
253 
254 #define SET_QPEL_FUNCS3(X, Y, SIZE, CPU, PREFIX) \
255  SET_QPEL_FUNC(avg, X, Y, SIZE, CPU, PREFIX); \
256  SET_QPEL_FUNC(put, X, Y, SIZE, CPU, PREFIX); \
257  SET_QPEL_FUNC(put_no_rnd, X, Y, SIZE, CPU, PREFIX)
258 
259 #define SET_H_QPEL_FUNCS(SIZE, CPU, PREFIX) \
260  SET_QPEL_FUNCS3(1, 0, SIZE, CPU, PREFIX); \
261  SET_QPEL_FUNCS3(2, 0, SIZE, CPU, PREFIX); \
262  SET_QPEL_FUNCS3(3, 0, SIZE, CPU, PREFIX)
263 
264 #define SET_V_QPEL_FUNCS(SIZE, CPU, PREFIX) \
265  SET_QPEL_FUNCS3(0, 1, SIZE, CPU, PREFIX); \
266  SET_QPEL_FUNCS3(0, 2, SIZE, CPU, PREFIX); \
267  SET_QPEL_FUNCS3(0, 3, SIZE, CPU, PREFIX)
268 
269 #define SET_HV_QPEL_FUNCS(SIZE, CPU, PREFIX) \
270  SET_QPEL_FUNCS3(1, 1, SIZE, CPU, PREFIX); \
271  SET_QPEL_FUNCS3(1, 2, SIZE, CPU, PREFIX); \
272  SET_QPEL_FUNCS3(1, 3, SIZE, CPU, PREFIX); \
273  SET_QPEL_FUNCS3(2, 1, SIZE, CPU, PREFIX); \
274  SET_QPEL_FUNCS3(2, 2, SIZE, CPU, PREFIX); \
275  SET_QPEL_FUNCS3(2, 3, SIZE, CPU, PREFIX); \
276  SET_QPEL_FUNCS3(3, 1, SIZE, CPU, PREFIX); \
277  SET_QPEL_FUNCS3(3, 2, SIZE, CPU, PREFIX); \
278  SET_QPEL_FUNCS3(3, 3, SIZE, CPU, PREFIX)
279 
281 {
282  int cpu_flags = av_get_cpu_flags();
283 
284  if (X86_MMXEXT(cpu_flags)) {
285 #if HAVE_MMXEXT_EXTERNAL
286  c->avg_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
287 #endif /* HAVE_MMXEXT_EXTERNAL */
288  }
289 #if HAVE_SSE2_EXTERNAL
290  if (EXTERNAL_SSE2(cpu_flags)) {
291  c->put_no_rnd_qpel_pixels_tab[0][0] =
292  c->put_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2;
293  c->put_no_rnd_qpel_pixels_tab[1][0] =
294  c->put_qpel_pixels_tab[1][0] = ff_put_pixels8x8_sse2;
295  c->avg_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
296 
297  SET_V_QPEL_FUNCS (16, sse2,);
298  SET_V_QPEL_FUNCS (8, sse2,);
299  }
300 #endif
301  if (EXTERNAL_SSSE3(cpu_flags)) {
302  SET_H_QPEL_FUNCS(8, ssse3,);
303  SET_HV_QPEL_FUNCS(8, ssse3,);
304  SET_H_QPEL_FUNCS(16, ssse3,);
305  SET_HV_QPEL_FUNCS(16, ssse3,);
306  }
307 }
SET_V_QPEL_FUNCS
#define SET_V_QPEL_FUNCS(SIZE, CPU, PREFIX)
Definition: qpeldsp_init.c:264
FF_VISIBILITY_PUSH_HIDDEN
#define FF_VISIBILITY_PUSH_HIDDEN
Definition: attributes_internal.h:30
cpu.h
SET_H_QPEL_FUNCS
#define SET_H_QPEL_FUNCS(SIZE, CPU, PREFIX)
Definition: qpeldsp_init.c:259
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:420
qpel.h
ff_avg_pixels16x16_sse2
void ff_avg_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
SET_HV_QPEL_FUNCS
#define SET_HV_QPEL_FUNCS(SIZE, CPU, PREFIX)
Definition: qpeldsp_init.c:269
ff_put_no_rnd_pixels16x16_l2_sse2
void ff_put_no_rnd_pixels16x16_l2_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride)
av_cold
#define av_cold
Definition: attributes.h:119
attributes_internal.h
QPEL_H
#define QPEL_H(OPNAME, RND, SIZE, UNUSED1, XMM, UNUSED2, UNUSED3, L2)
Definition: qpeldsp_init.c:44
QPEL_V
#define QPEL_V(OPNAME, RND, SIZE, UNUSED1, UNUSED2, XMM, UNUSED3, L2)
Definition: qpeldsp_init.c:80
ff_put_pixels16x16_sse2
void ff_put_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
qpeldsp.h
ff_put_pixels8x8_sse2
void ff_put_pixels8x8_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
cpu.h
ff_qpeldsp_init_x86
av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
Definition: qpeldsp_init.c:280
QPEL3
#define QPEL3(MACRO, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2)
Definition: qpeldsp_init.c:239
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:53
FF_VISIBILITY_POP_HIDDEN
#define FF_VISIBILITY_POP_HIDDEN
Definition: attributes_internal.h:31
ff_put_no_rnd_pixels8x8_l2_mmxext
FF_VISIBILITY_PUSH_HIDDEN void ff_put_no_rnd_pixels8x8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride)
src2
const pixel * src2
Definition: h264pred_template.c:421
ff_avg_pixels8x8_mmxext
void ff_avg_pixels8x8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
QpelDSPContext
quarterpel DSP context
Definition: qpeldsp.h:72
QPEL_HV
#define QPEL_HV(OPNAME, RND, SIZE, SIZEP1, HXMM, VXMM, HVXMM, L2)
Definition: qpeldsp_init.c:115
fpel.h
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:59
X86_MMXEXT
#define X86_MMXEXT(flags)
Definition: cpu.h:26