FFmpeg
h264dsp_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/attributes.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/x86/asm.h"
24 #include "libavutil/x86/cpu.h"
25 #include "libavcodec/h264dsp.h"
26 
27 /***********************************/
28 /* IDCT */
29 #define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \
30 void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst, \
31  int16_t *block, \
32  int stride);
33 
34 IDCT_ADD_FUNC(, 8, mmx)
35 IDCT_ADD_FUNC(, 8, sse2)
36 IDCT_ADD_FUNC(, 8, avx)
37 IDCT_ADD_FUNC(, 10, sse2)
38 IDCT_ADD_FUNC(_dc, 8, mmxext)
39 IDCT_ADD_FUNC(_dc, 8, sse2)
40 IDCT_ADD_FUNC(_dc, 8, avx)
41 IDCT_ADD_FUNC(_dc, 10, mmxext)
42 IDCT_ADD_FUNC(8_dc, 8, mmxext)
43 IDCT_ADD_FUNC(8_dc, 10, sse2)
44 IDCT_ADD_FUNC(8, 8, mmx)
45 IDCT_ADD_FUNC(8, 8, sse2)
46 IDCT_ADD_FUNC(8, 10, sse2)
47 IDCT_ADD_FUNC(, 10, avx)
48 IDCT_ADD_FUNC(8_dc, 10, avx)
49 IDCT_ADD_FUNC(8, 10, avx)
50 
51 
52 #define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \
53 void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
54  (uint8_t *dst, const int *block_offset, \
55  int16_t *block, int stride, const uint8_t nnzc[6 * 8]);
56 
57 IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
58 IDCT_ADD_REP_FUNC(8, 4, 8, mmxext)
59 IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
60 IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
61 IDCT_ADD_REP_FUNC(8, 4, 10, avx)
62 IDCT_ADD_REP_FUNC(, 16, 8, mmx)
63 IDCT_ADD_REP_FUNC(, 16, 8, mmxext)
64 IDCT_ADD_REP_FUNC(, 16, 8, sse2)
65 IDCT_ADD_REP_FUNC(, 16, 10, sse2)
66 IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
67 IDCT_ADD_REP_FUNC(, 16intra, 8, mmxext)
68 IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
69 IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
70 IDCT_ADD_REP_FUNC(, 16, 10, avx)
71 IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
72 
73 
74 #define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \
75 void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
76  (uint8_t **dst, const int *block_offset, \
77  int16_t *block, int stride, const uint8_t nnzc[6 * 8]);
78 
79 IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
80 IDCT_ADD_REP_FUNC2(, 8, 8, mmxext)
81 IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
82 IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
83 IDCT_ADD_REP_FUNC2(, 8, 10, avx)
84 
85 IDCT_ADD_REP_FUNC2(, 8_422, 8, mmx)
86 
87 IDCT_ADD_REP_FUNC2(, 8_422, 10, sse2)
88 IDCT_ADD_REP_FUNC2(, 8_422, 10, avx)
89 
90 void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul);
91 void ff_h264_luma_dc_dequant_idct_sse2(int16_t *output, int16_t *input, int qmul);
92 
93 /***********************************/
94 /* deblocking */
95 
96 void ff_h264_loop_filter_strength_mmxext(int16_t bS[2][4][4], uint8_t nnz[40],
97  int8_t ref[2][40],
98  int16_t mv[2][40][2],
99  int bidir, int edges, int step,
100  int mask_mv0, int mask_mv1, int field);
101 
102 #define LF_FUNC(DIR, TYPE, DEPTH, OPT) \
103 void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \
104  ptrdiff_t stride, \
105  int alpha, \
106  int beta, \
107  int8_t *tc0);
108 #define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \
109 void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \
110  ptrdiff_t stride, \
111  int alpha, \
112  int beta);
113 
114 #define LF_FUNCS(type, depth) \
115 LF_FUNC(h, chroma, depth, mmxext) \
116 LF_IFUNC(h, chroma_intra, depth, mmxext) \
117 LF_FUNC(h, chroma422, depth, mmxext) \
118 LF_IFUNC(h, chroma422_intra, depth, mmxext) \
119 LF_FUNC(v, chroma, depth, mmxext) \
120 LF_IFUNC(v, chroma_intra, depth, mmxext) \
121 LF_FUNC(h, luma, depth, mmxext) \
122 LF_IFUNC(h, luma_intra, depth, mmxext) \
123 LF_FUNC(h, luma, depth, sse2) \
124 LF_IFUNC(h, luma_intra, depth, sse2) \
125 LF_FUNC(v, luma, depth, sse2) \
126 LF_IFUNC(v, luma_intra, depth, sse2) \
127 LF_FUNC(h, chroma, depth, sse2) \
128 LF_IFUNC(h, chroma_intra, depth, sse2) \
129 LF_FUNC(h, chroma422, depth, sse2) \
130 LF_IFUNC(h, chroma422_intra, depth, sse2) \
131 LF_FUNC(v, chroma, depth, sse2) \
132 LF_IFUNC(v, chroma_intra, depth, sse2) \
133 LF_FUNC(h, luma, depth, avx) \
134 LF_IFUNC(h, luma_intra, depth, avx) \
135 LF_FUNC(v, luma, depth, avx) \
136 LF_IFUNC(v, luma_intra, depth, avx) \
137 LF_FUNC(h, chroma, depth, avx) \
138 LF_IFUNC(h, chroma_intra, depth, avx) \
139 LF_FUNC(h, chroma422, depth, avx) \
140 LF_IFUNC(h, chroma422_intra, depth, avx) \
141 LF_FUNC(v, chroma, depth, avx) \
142 LF_IFUNC(v, chroma_intra, depth, avx)
143 
144 LF_FUNC(h, luma_mbaff, 8, sse2)
145 LF_FUNC(h, luma_mbaff, 8, avx)
146 
147 LF_FUNCS(uint8_t, 8)
148 LF_FUNCS(uint16_t, 10)
149 
150 #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
151 LF_FUNC(v8, luma, 8, mmxext)
152 static void deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha,
153  int beta, int8_t *tc0)
154 {
155  if ((tc0[0] & tc0[1]) >= 0)
156  ff_deblock_v8_luma_8_mmxext(pix + 0, stride, alpha, beta, tc0);
157  if ((tc0[2] & tc0[3]) >= 0)
158  ff_deblock_v8_luma_8_mmxext(pix + 8, stride, alpha, beta, tc0 + 2);
159 }
160 LF_IFUNC(v8, luma_intra, 8, mmxext)
161 static void deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride,
162  int alpha, int beta)
163 {
164  ff_deblock_v8_luma_intra_8_mmxext(pix + 0, stride, alpha, beta);
165  ff_deblock_v8_luma_intra_8_mmxext(pix + 8, stride, alpha, beta);
166 }
167 #endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */
168 
169 LF_FUNC(v, luma, 10, mmxext)
170 LF_IFUNC(v, luma_intra, 10, mmxext)
171 
172 /***********************************/
173 /* weighted prediction */
174 
175 #define H264_WEIGHT(W, OPT) \
176 void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, ptrdiff_t stride, \
177  int height, int log2_denom, \
178  int weight, int offset);
179 
180 #define H264_BIWEIGHT(W, OPT) \
181 void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src, \
182  ptrdiff_t stride, int height, \
183  int log2_denom, int weightd, \
184  int weights, int offset);
185 
186 #define H264_BIWEIGHT_MMX(W) \
187  H264_WEIGHT(W, mmxext) \
188  H264_BIWEIGHT(W, mmxext)
189 
190 #define H264_BIWEIGHT_MMX_SSE(W) \
191  H264_BIWEIGHT_MMX(W) \
192  H264_WEIGHT(W, sse2) \
193  H264_BIWEIGHT(W, sse2) \
194  H264_BIWEIGHT(W, ssse3)
195 
199 
200 #define H264_WEIGHT_10(W, DEPTH, OPT) \
201 void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
202  ptrdiff_t stride, \
203  int height, \
204  int log2_denom, \
205  int weight, \
206  int offset);
207 
208 #define H264_BIWEIGHT_10(W, DEPTH, OPT) \
209 void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
210  uint8_t *src, \
211  ptrdiff_t stride, \
212  int height, \
213  int log2_denom, \
214  int weightd, \
215  int weights, \
216  int offset);
217 
218 #define H264_BIWEIGHT_10_SSE(W, DEPTH) \
219  H264_WEIGHT_10(W, DEPTH, sse2) \
220  H264_WEIGHT_10(W, DEPTH, sse4) \
221  H264_BIWEIGHT_10(W, DEPTH, sse2) \
222  H264_BIWEIGHT_10(W, DEPTH, sse4)
223 
224 H264_BIWEIGHT_10_SSE(16, 10)
227 
229  const int chroma_format_idc)
230 {
231 #if HAVE_X86ASM
232  int cpu_flags = av_get_cpu_flags();
233 
234  if (EXTERNAL_MMXEXT(cpu_flags) && chroma_format_idc <= 1)
235  c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmxext;
236 
237  if (bit_depth == 8) {
238  if (EXTERNAL_MMX(cpu_flags)) {
239  c->h264_idct_dc_add =
240  c->h264_idct_add = ff_h264_idct_add_8_mmx;
241  c->h264_idct8_dc_add =
242  c->h264_idct8_add = ff_h264_idct8_add_8_mmx;
243 
244  c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
245  c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
246  if (chroma_format_idc <= 1) {
247  c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
248  } else {
249  c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmx;
250  }
251  c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
253  c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
254  }
255  if (EXTERNAL_MMXEXT(cpu_flags)) {
256  c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmxext;
257  c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
258  c->h264_idct_add16 = ff_h264_idct_add16_8_mmxext;
259  c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmxext;
260  if (chroma_format_idc <= 1)
261  c->h264_idct_add8 = ff_h264_idct_add8_8_mmxext;
262  c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmxext;
263 
264  c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_mmxext;
265  c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmxext;
266  if (chroma_format_idc <= 1) {
267  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_mmxext;
268  c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
269  } else {
270  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_mmxext;
271  c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_mmxext;
272  }
273 #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
274  c->h264_v_loop_filter_luma = deblock_v_luma_8_mmxext;
275  c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmxext;
276  c->h264_v_loop_filter_luma_intra = deblock_v_luma_intra_8_mmxext;
277  c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
278 #endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */
279  c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmxext;
280  c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmxext;
281  c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext;
282 
283  c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmxext;
284  c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmxext;
285  c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext;
286  }
287  if (EXTERNAL_SSE2(cpu_flags)) {
288  c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
289 
290  c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
291  c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
292  if (chroma_format_idc <= 1)
293  c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
294  c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2;
295  c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;
296 
297  c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
298  c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;
299 
300  c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
301  c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;
302 
303  c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
304  c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2;
305  c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
306  c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
307 
308 #if ARCH_X86_64
309  c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2;
310 #endif
311 
312  c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_sse2;
313  c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_sse2;
314  if (chroma_format_idc <= 1) {
315  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_sse2;
316  c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_sse2;
317  } else {
318  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_sse2;
319  c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_sse2;
320  }
321 
322  c->h264_idct_add = ff_h264_idct_add_8_sse2;
323  c->h264_idct_dc_add = ff_h264_idct_dc_add_8_sse2;
324  }
325  if (EXTERNAL_SSSE3(cpu_flags)) {
326  c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
327  c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
328  }
329  if (EXTERNAL_AVX(cpu_flags)) {
330  c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
331  c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
332  c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
333  c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
334 #if ARCH_X86_64
335  c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx;
336 #endif
337 
338  c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_avx;
339  c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_avx;
340  if (chroma_format_idc <= 1) {
341  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_avx;
342  c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_avx;
343  } else {
344  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_avx;
345  c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_avx;
346  }
347 
348  c->h264_idct_add = ff_h264_idct_add_8_avx;
349  c->h264_idct_dc_add = ff_h264_idct_dc_add_8_avx;
350  }
351  } else if (bit_depth == 10) {
352  if (EXTERNAL_MMXEXT(cpu_flags)) {
353 #if ARCH_X86_32
354  c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmxext;
355  c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
356  if (chroma_format_idc <= 1) {
357  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_mmxext;
358  } else {
359  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_mmxext;
360  }
361  c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmxext;
362  c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmxext;
363  c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
364  c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
365 #endif /* ARCH_X86_32 */
366  c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
367  }
368  if (EXTERNAL_SSE2(cpu_flags)) {
369  c->h264_idct_add = ff_h264_idct_add_10_sse2;
370  c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
371 
372  c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
373  if (chroma_format_idc <= 1) {
374  c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
375  } else {
376  c->h264_idct_add8 = ff_h264_idct_add8_422_10_sse2;
377  }
378  c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
379 #if HAVE_ALIGNED_STACK
380  c->h264_idct8_add = ff_h264_idct8_add_10_sse2;
381  c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
382 #endif /* HAVE_ALIGNED_STACK */
383 
384  c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
385  c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
386  c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
387 
388  c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
389  c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
390  c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
391 
392  c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_sse2;
393  c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
394  if (chroma_format_idc <= 1) {
395  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_sse2;
396  } else {
397  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
398  }
399 #if HAVE_ALIGNED_STACK
400  c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
401  c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2;
402  c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
403  c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
404 #endif /* HAVE_ALIGNED_STACK */
405  }
406  if (EXTERNAL_SSE4(cpu_flags)) {
407  c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
408  c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
409  c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
410 
411  c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
412  c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
413  c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
414  }
415  if (EXTERNAL_AVX(cpu_flags)) {
416  c->h264_idct_dc_add =
417  c->h264_idct_add = ff_h264_idct_add_10_avx;
418  c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
419 
420  c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
421  if (chroma_format_idc <= 1) {
422  c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
423  } else {
424  c->h264_idct_add8 = ff_h264_idct_add8_422_10_avx;
425  }
426  c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
427 #if HAVE_ALIGNED_STACK
428  c->h264_idct8_add = ff_h264_idct8_add_10_avx;
429  c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
430 #endif /* HAVE_ALIGNED_STACK */
431 
432  c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_avx;
433  c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
434  if (chroma_format_idc <= 1) {
435  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_avx;
436  } else {
437  c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
438  }
439 #if HAVE_ALIGNED_STACK
440  c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
441  c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx;
442  c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx;
443  c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
444 #endif /* HAVE_ALIGNED_STACK */
445  }
446  }
447 #endif
448 }
stride
int stride
Definition: mace.c:144
bit_depth
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:226
cpu.h
mv
static const int8_t mv[256][2]
Definition: 4xm.c:77
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
LF_FUNC
#define LF_FUNC(DIR, TYPE, DEPTH, OPT)
Definition: h264dsp_init.c:102
ff_h264_luma_dc_dequant_idct_sse2
void ff_h264_luma_dc_dequant_idct_sse2(int16_t *output, int16_t *input, int qmul)
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
IDCT_ADD_REP_FUNC
#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT)
Definition: h264dsp_init.c:52
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
LF_IFUNC
#define LF_IFUNC(DIR, TYPE, DEPTH, OPT)
Definition: h264dsp_init.c:108
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
IDCT_ADD_FUNC
#define IDCT_ADD_FUNC(NUM, DEPTH, OPT)
Definition: h264dsp_init.c:29
ff_h264_luma_dc_dequant_idct_mmx
void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul)
av_cold
#define av_cold
Definition: attributes.h:84
field
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this field
Definition: writing_filters.txt:78
h264dsp.h
H264_BIWEIGHT_10_SSE
#define H264_BIWEIGHT_10_SSE(W, DEPTH)
Definition: h264dsp_init.c:218
AV_CPU_FLAG_CMOV
#define AV_CPU_FLAG_CMOV
supports cmov instruction
Definition: cpu.h:53
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
H264DSPContext
Context for storing H.264 DSP functions.
Definition: h264dsp.h:42
cpu.h
H264_BIWEIGHT_MMX
#define H264_BIWEIGHT_MMX(W)
Definition: h264dsp_init.c:186
asm.h
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
IDCT_ADD_REP_FUNC2
#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT)
Definition: h264dsp_init.c:74
ff_h264_loop_filter_strength_mmxext
void ff_h264_loop_filter_strength_mmxext(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field)
uint8_t
uint8_t
Definition: audio_convert.c:194
ff_h264dsp_init_x86
av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
Definition: h264dsp_init.c:228
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
LF_FUNCS
#define LF_FUNCS(type, depth)
Definition: h264dsp_init.c:114
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
H264_BIWEIGHT_MMX_SSE
#define H264_BIWEIGHT_MMX_SSE(W)
Definition: h264dsp_init.c:190
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:107
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
h
h
Definition: vp9dsp_template.c:2038
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
EXTERNAL_MMX
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57