FFmpeg
hevcdsp_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 Seppo Tomperi
3  * Copyright (c) 2013 - 2014 Pierre-Edouard Lepere
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "config.h"
23 
24 #include "libavutil/cpu.h"
25 #include "libavutil/x86/asm.h"
26 #include "libavutil/x86/cpu.h"
27 #include "libavcodec/get_bits.h" /* required for hevcdsp.h GetBitContext */
28 #include "libavcodec/hevcdsp.h"
29 #include "libavcodec/x86/hevcdsp.h"
30 
31 #define LFC_FUNC(DIR, DEPTH, OPT) \
32 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q);
33 
34 #define LFL_FUNC(DIR, DEPTH, OPT) \
35 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q);
36 
37 #define LFC_FUNCS(type, depth, opt) \
38  LFC_FUNC(h, depth, opt) \
39  LFC_FUNC(v, depth, opt)
40 
41 #define LFL_FUNCS(type, depth, opt) \
42  LFL_FUNC(h, depth, opt) \
43  LFL_FUNC(v, depth, opt)
44 
45 LFC_FUNCS(uint8_t, 8, sse2)
46 LFC_FUNCS(uint8_t, 10, sse2)
47 LFC_FUNCS(uint8_t, 12, sse2)
48 LFC_FUNCS(uint8_t, 8, avx)
49 LFC_FUNCS(uint8_t, 10, avx)
50 LFC_FUNCS(uint8_t, 12, avx)
51 LFL_FUNCS(uint8_t, 8, sse2)
52 LFL_FUNCS(uint8_t, 10, sse2)
53 LFL_FUNCS(uint8_t, 12, sse2)
54 LFL_FUNCS(uint8_t, 8, ssse3)
55 LFL_FUNCS(uint8_t, 10, ssse3)
56 LFL_FUNCS(uint8_t, 12, ssse3)
57 LFL_FUNCS(uint8_t, 8, avx)
58 LFL_FUNCS(uint8_t, 10, avx)
59 LFL_FUNCS(uint8_t, 12, avx)
60 
61 #define IDCT_DC_FUNCS(W, opt) \
62 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
63 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
64 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
65 
66 IDCT_DC_FUNCS(4x4, mmxext);
67 IDCT_DC_FUNCS(8x8, mmxext);
68 IDCT_DC_FUNCS(8x8, sse2);
69 IDCT_DC_FUNCS(16x16, sse2);
70 IDCT_DC_FUNCS(32x32, sse2);
71 IDCT_DC_FUNCS(16x16, avx2);
72 IDCT_DC_FUNCS(32x32, avx2);
73 
74 #define IDCT_FUNCS(opt) \
75 void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
76 void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
77 void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
78 void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
79 void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
80 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
81 void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
82 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
83 
84 IDCT_FUNCS(sse2)
85 IDCT_FUNCS(avx)
86 
87 #define mc_rep_func(name, bitd, step, W, opt) \
88 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \
89  uint8_t *_src, ptrdiff_t _srcstride, int height, \
90  intptr_t mx, intptr_t my, int width) \
91 { \
92  int i; \
93  uint8_t *src; \
94  int16_t *dst; \
95  for (i = 0; i < W; i += step) { \
96  src = _src + (i * ((bitd + 7) / 8)); \
97  dst = _dst + i; \
98  ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
99  } \
100 }
101 #define mc_rep_uni_func(name, bitd, step, W, opt) \
102 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \
103  uint8_t *_src, ptrdiff_t _srcstride, int height, \
104  intptr_t mx, intptr_t my, int width) \
105 { \
106  int i; \
107  uint8_t *src; \
108  uint8_t *dst; \
109  for (i = 0; i < W; i += step) { \
110  src = _src + (i * ((bitd + 7) / 8)); \
111  dst = _dst + (i * ((bitd + 7) / 8)); \
112  ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
113  height, mx, my, width); \
114  } \
115 }
116 #define mc_rep_bi_func(name, bitd, step, W, opt) \
117 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, uint8_t *_src, \
118  ptrdiff_t _srcstride, int16_t* _src2, \
119  int height, intptr_t mx, intptr_t my, int width) \
120 { \
121  int i; \
122  uint8_t *src; \
123  uint8_t *dst; \
124  int16_t *src2; \
125  for (i = 0; i < W ; i += step) { \
126  src = _src + (i * ((bitd + 7) / 8)); \
127  dst = _dst + (i * ((bitd + 7) / 8)); \
128  src2 = _src2 + i; \
129  ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \
130  height, mx, my, width); \
131  } \
132 }
133 
134 #define mc_rep_funcs(name, bitd, step, W, opt) \
135  mc_rep_func(name, bitd, step, W, opt) \
136  mc_rep_uni_func(name, bitd, step, W, opt) \
137  mc_rep_bi_func(name, bitd, step, W, opt)
138 
139 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
140 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst, \
141  uint8_t *src, ptrdiff_t _srcstride, int height, \
142  intptr_t mx, intptr_t my, int width) \
143 { \
144  ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
145  ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \
146  _srcstride, height, mx, my, width); \
147 }
148 #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
149 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \
150  uint8_t *src, ptrdiff_t _srcstride, int height, \
151  intptr_t mx, intptr_t my, int width) \
152 { \
153  ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\
154  ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
155  src + (step1 * ((bitd + 7) / 8)), _srcstride, \
156  height, mx, my, width); \
157 }
158 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
159 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
160  ptrdiff_t _srcstride, int16_t* src2, \
161  int height, intptr_t mx, intptr_t my, int width) \
162 { \
163  ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
164  ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
165  src + (step1 * ((bitd + 7) / 8)), _srcstride, \
166  src2 + step1, height, mx, my, width); \
167 }
168 
169 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
170  mc_rep_func2(name, bitd, step1, step2, W, opt) \
171  mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
172  mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
173 
174 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
175 
176 #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
177 void ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride, \
178  int height, intptr_t mx, intptr_t my, int width) \
179  \
180 { \
181  ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \
182  ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
183 }
184 
185 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
186 void ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
187  ptrdiff_t _srcstride, int16_t *src2, \
188  int height, intptr_t mx, intptr_t my, int width) \
189 { \
190  ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \
191  height, mx, my, width); \
192  ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\
193  height, mx, my, width); \
194 }
195 
196 #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
197 void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \
198  uint8_t *src, ptrdiff_t _srcstride, int height, \
199  intptr_t mx, intptr_t my, int width) \
200 { \
201  ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \
202  height, mx, my, width); \
203  ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \
204  height, mx, my, width); \
205 }
206 
207 #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \
208 mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
209 mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
210 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
211 
212 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
213 void ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride, \
214  int height, intptr_t mx, intptr_t my, int width) \
215  \
216 { \
217  ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \
218  ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \
219 }
220 
221 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
222 void ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
223  ptrdiff_t _srcstride, int16_t* src2, \
224  int height, intptr_t mx, intptr_t my, int width) \
225 { \
226  ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
227  src2, height, mx, my, width); \
228  ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
229  src2+width2, height, mx, my, width); \
230 }
231 
232 #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
233 void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \
234  uint8_t *src, ptrdiff_t _srcstride, int height, \
235  intptr_t mx, intptr_t my, int width) \
236 { \
237  ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
238  height, mx, my, width); \
239  ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
240  height, mx, my, width); \
241 }
242 
243 #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \
244 mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
245 mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
246 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
247 
248 #if HAVE_AVX2_EXTERNAL
249 
250 mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
251 mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4)
252 mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4)
253 mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4)
254 
255 mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
256 mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
257 mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32)
258 mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32)
259 mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32)
260 
261 
262 mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32)
263 mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32)
264 mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32)
265 
266 
267 mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit
268 mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit
269 
270 mc_rep_funcs(pel_pixels, 8, 32, 64, avx2)
271 
272 mc_rep_func(pel_pixels, 10, 16, 32, avx2)
273 mc_rep_func(pel_pixels, 10, 16, 48, avx2)
274 mc_rep_func(pel_pixels, 10, 32, 64, avx2)
275 
276 mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2)
277 mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2)
278 mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2)
279 
280 mc_rep_funcs(epel_h, 8, 32, 64, avx2)
281 
282 mc_rep_funcs(epel_v, 8, 32, 64, avx2)
283 
284 mc_rep_funcs(epel_h, 10, 16, 32, avx2)
285 mc_rep_funcs(epel_h, 10, 16, 48, avx2)
286 mc_rep_funcs(epel_h, 10, 32, 64, avx2)
287 
288 mc_rep_funcs(epel_v, 10, 16, 32, avx2)
289 mc_rep_funcs(epel_v, 10, 16, 48, avx2)
290 mc_rep_funcs(epel_v, 10, 32, 64, avx2)
291 
292 
293 mc_rep_funcs(epel_hv, 8, 32, 64, avx2)
294 
295 mc_rep_funcs(epel_hv, 10, 16, 32, avx2)
296 mc_rep_funcs(epel_hv, 10, 16, 48, avx2)
297 mc_rep_funcs(epel_hv, 10, 32, 64, avx2)
298 
299 mc_rep_funcs(qpel_h, 8, 32, 64, avx2)
300 mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4)
301 
302 mc_rep_funcs(qpel_v, 8, 32, 64, avx2)
303 mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4)
304 
305 mc_rep_funcs(qpel_h, 10, 16, 32, avx2)
306 mc_rep_funcs(qpel_h, 10, 16, 48, avx2)
307 mc_rep_funcs(qpel_h, 10, 32, 64, avx2)
308 
309 mc_rep_funcs(qpel_v, 10, 16, 32, avx2)
310 mc_rep_funcs(qpel_v, 10, 16, 48, avx2)
311 mc_rep_funcs(qpel_v, 10, 32, 64, avx2)
312 
313 mc_rep_funcs(qpel_hv, 10, 16, 32, avx2)
314 mc_rep_funcs(qpel_hv, 10, 16, 48, avx2)
315 mc_rep_funcs(qpel_hv, 10, 32, 64, avx2)
316 
317 #endif //AVX2
318 
319 mc_rep_funcs(pel_pixels, 8, 16, 64, sse4)
320 mc_rep_funcs(pel_pixels, 8, 16, 48, sse4)
321 mc_rep_funcs(pel_pixels, 8, 16, 32, sse4)
322 mc_rep_funcs(pel_pixels, 8, 8, 24, sse4)
323 mc_rep_funcs(pel_pixels,10, 8, 64, sse4)
324 mc_rep_funcs(pel_pixels,10, 8, 48, sse4)
325 mc_rep_funcs(pel_pixels,10, 8, 32, sse4)
326 mc_rep_funcs(pel_pixels,10, 8, 24, sse4)
327 mc_rep_funcs(pel_pixels,10, 8, 16, sse4)
328 mc_rep_funcs(pel_pixels,10, 4, 12, sse4)
329 mc_rep_funcs(pel_pixels,12, 8, 64, sse4)
330 mc_rep_funcs(pel_pixels,12, 8, 48, sse4)
331 mc_rep_funcs(pel_pixels,12, 8, 32, sse4)
332 mc_rep_funcs(pel_pixels,12, 8, 24, sse4)
333 mc_rep_funcs(pel_pixels,12, 8, 16, sse4)
334 mc_rep_funcs(pel_pixels,12, 4, 12, sse4)
335 
336 mc_rep_funcs(epel_h, 8, 16, 64, sse4)
337 mc_rep_funcs(epel_h, 8, 16, 48, sse4)
338 mc_rep_funcs(epel_h, 8, 16, 32, sse4)
339 mc_rep_funcs(epel_h, 8, 8, 24, sse4)
340 mc_rep_funcs(epel_h,10, 8, 64, sse4)
341 mc_rep_funcs(epel_h,10, 8, 48, sse4)
342 mc_rep_funcs(epel_h,10, 8, 32, sse4)
343 mc_rep_funcs(epel_h,10, 8, 24, sse4)
344 mc_rep_funcs(epel_h,10, 8, 16, sse4)
345 mc_rep_funcs(epel_h,10, 4, 12, sse4)
346 mc_rep_funcs(epel_h,12, 8, 64, sse4)
347 mc_rep_funcs(epel_h,12, 8, 48, sse4)
348 mc_rep_funcs(epel_h,12, 8, 32, sse4)
349 mc_rep_funcs(epel_h,12, 8, 24, sse4)
350 mc_rep_funcs(epel_h,12, 8, 16, sse4)
351 mc_rep_funcs(epel_h,12, 4, 12, sse4)
352 mc_rep_funcs(epel_v, 8, 16, 64, sse4)
353 mc_rep_funcs(epel_v, 8, 16, 48, sse4)
354 mc_rep_funcs(epel_v, 8, 16, 32, sse4)
355 mc_rep_funcs(epel_v, 8, 8, 24, sse4)
356 mc_rep_funcs(epel_v,10, 8, 64, sse4)
357 mc_rep_funcs(epel_v,10, 8, 48, sse4)
358 mc_rep_funcs(epel_v,10, 8, 32, sse4)
359 mc_rep_funcs(epel_v,10, 8, 24, sse4)
360 mc_rep_funcs(epel_v,10, 8, 16, sse4)
361 mc_rep_funcs(epel_v,10, 4, 12, sse4)
362 mc_rep_funcs(epel_v,12, 8, 64, sse4)
363 mc_rep_funcs(epel_v,12, 8, 48, sse4)
364 mc_rep_funcs(epel_v,12, 8, 32, sse4)
365 mc_rep_funcs(epel_v,12, 8, 24, sse4)
366 mc_rep_funcs(epel_v,12, 8, 16, sse4)
367 mc_rep_funcs(epel_v,12, 4, 12, sse4)
368 mc_rep_funcs(epel_hv, 8, 16, 64, sse4)
369 mc_rep_funcs(epel_hv, 8, 16, 48, sse4)
370 mc_rep_funcs(epel_hv, 8, 16, 32, sse4)
371 mc_rep_funcs(epel_hv, 8, 8, 24, sse4)
372 mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4)
373 mc_rep_funcs(epel_hv,10, 8, 64, sse4)
374 mc_rep_funcs(epel_hv,10, 8, 48, sse4)
375 mc_rep_funcs(epel_hv,10, 8, 32, sse4)
376 mc_rep_funcs(epel_hv,10, 8, 24, sse4)
377 mc_rep_funcs(epel_hv,10, 8, 16, sse4)
378 mc_rep_funcs(epel_hv,10, 4, 12, sse4)
379 mc_rep_funcs(epel_hv,12, 8, 64, sse4)
380 mc_rep_funcs(epel_hv,12, 8, 48, sse4)
381 mc_rep_funcs(epel_hv,12, 8, 32, sse4)
382 mc_rep_funcs(epel_hv,12, 8, 24, sse4)
383 mc_rep_funcs(epel_hv,12, 8, 16, sse4)
384 mc_rep_funcs(epel_hv,12, 4, 12, sse4)
385 
386 mc_rep_funcs(qpel_h, 8, 16, 64, sse4)
387 mc_rep_funcs(qpel_h, 8, 16, 48, sse4)
388 mc_rep_funcs(qpel_h, 8, 16, 32, sse4)
389 mc_rep_funcs(qpel_h, 8, 8, 24, sse4)
390 mc_rep_funcs(qpel_h,10, 8, 64, sse4)
391 mc_rep_funcs(qpel_h,10, 8, 48, sse4)
392 mc_rep_funcs(qpel_h,10, 8, 32, sse4)
393 mc_rep_funcs(qpel_h,10, 8, 24, sse4)
394 mc_rep_funcs(qpel_h,10, 8, 16, sse4)
395 mc_rep_funcs(qpel_h,10, 4, 12, sse4)
396 mc_rep_funcs(qpel_h,12, 8, 64, sse4)
397 mc_rep_funcs(qpel_h,12, 8, 48, sse4)
398 mc_rep_funcs(qpel_h,12, 8, 32, sse4)
399 mc_rep_funcs(qpel_h,12, 8, 24, sse4)
400 mc_rep_funcs(qpel_h,12, 8, 16, sse4)
401 mc_rep_funcs(qpel_h,12, 4, 12, sse4)
402 mc_rep_funcs(qpel_v, 8, 16, 64, sse4)
403 mc_rep_funcs(qpel_v, 8, 16, 48, sse4)
404 mc_rep_funcs(qpel_v, 8, 16, 32, sse4)
405 mc_rep_funcs(qpel_v, 8, 8, 24, sse4)
406 mc_rep_funcs(qpel_v,10, 8, 64, sse4)
407 mc_rep_funcs(qpel_v,10, 8, 48, sse4)
408 mc_rep_funcs(qpel_v,10, 8, 32, sse4)
409 mc_rep_funcs(qpel_v,10, 8, 24, sse4)
410 mc_rep_funcs(qpel_v,10, 8, 16, sse4)
411 mc_rep_funcs(qpel_v,10, 4, 12, sse4)
412 mc_rep_funcs(qpel_v,12, 8, 64, sse4)
413 mc_rep_funcs(qpel_v,12, 8, 48, sse4)
414 mc_rep_funcs(qpel_v,12, 8, 32, sse4)
415 mc_rep_funcs(qpel_v,12, 8, 24, sse4)
416 mc_rep_funcs(qpel_v,12, 8, 16, sse4)
417 mc_rep_funcs(qpel_v,12, 4, 12, sse4)
418 mc_rep_funcs(qpel_hv, 8, 8, 64, sse4)
419 mc_rep_funcs(qpel_hv, 8, 8, 48, sse4)
420 mc_rep_funcs(qpel_hv, 8, 8, 32, sse4)
421 mc_rep_funcs(qpel_hv, 8, 8, 24, sse4)
422 mc_rep_funcs(qpel_hv, 8, 8, 16, sse4)
423 mc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4)
424 mc_rep_funcs(qpel_hv,10, 8, 64, sse4)
425 mc_rep_funcs(qpel_hv,10, 8, 48, sse4)
426 mc_rep_funcs(qpel_hv,10, 8, 32, sse4)
427 mc_rep_funcs(qpel_hv,10, 8, 24, sse4)
428 mc_rep_funcs(qpel_hv,10, 8, 16, sse4)
429 mc_rep_funcs(qpel_hv,10, 4, 12, sse4)
430 mc_rep_funcs(qpel_hv,12, 8, 64, sse4)
431 mc_rep_funcs(qpel_hv,12, 8, 48, sse4)
432 mc_rep_funcs(qpel_hv,12, 8, 32, sse4)
433 mc_rep_funcs(qpel_hv,12, 8, 24, sse4)
434 mc_rep_funcs(qpel_hv,12, 8, 16, sse4)
435 mc_rep_funcs(qpel_hv,12, 4, 12, sse4)
436 
437 #define mc_rep_uni_w(bitd, step, W, opt) \
438 void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \
439  int height, int denom, int _wx, int _ox) \
440 { \
441  int i; \
442  int16_t *src; \
443  uint8_t *dst; \
444  for (i = 0; i < W; i += step) { \
445  src= _src + i; \
446  dst= _dst + (i * ((bitd + 7) / 8)); \
447  ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, \
448  height, denom, _wx, _ox); \
449  } \
450 }
451 
452 mc_rep_uni_w(8, 6, 12, sse4)
453 mc_rep_uni_w(8, 8, 16, sse4)
454 mc_rep_uni_w(8, 8, 24, sse4)
455 mc_rep_uni_w(8, 8, 32, sse4)
456 mc_rep_uni_w(8, 8, 48, sse4)
457 mc_rep_uni_w(8, 8, 64, sse4)
458 
459 mc_rep_uni_w(10, 6, 12, sse4)
460 mc_rep_uni_w(10, 8, 16, sse4)
461 mc_rep_uni_w(10, 8, 24, sse4)
462 mc_rep_uni_w(10, 8, 32, sse4)
463 mc_rep_uni_w(10, 8, 48, sse4)
464 mc_rep_uni_w(10, 8, 64, sse4)
465 
466 mc_rep_uni_w(12, 6, 12, sse4)
467 mc_rep_uni_w(12, 8, 16, sse4)
468 mc_rep_uni_w(12, 8, 24, sse4)
469 mc_rep_uni_w(12, 8, 32, sse4)
470 mc_rep_uni_w(12, 8, 48, sse4)
471 mc_rep_uni_w(12, 8, 64, sse4)
472 
473 #define mc_rep_bi_w(bitd, step, W, opt) \
474 void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \
475  int16_t *_src2, int height, \
476  int denom, int _wx0, int _wx1, int _ox0, int _ox1) \
477 { \
478  int i; \
479  int16_t *src; \
480  int16_t *src2; \
481  uint8_t *dst; \
482  for (i = 0; i < W; i += step) { \
483  src = _src + i; \
484  src2 = _src2 + i; \
485  dst = _dst + (i * ((bitd + 7) / 8)); \
486  ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \
487  height, denom, _wx0, _wx1, _ox0, _ox1); \
488  } \
489 }
490 
491 mc_rep_bi_w(8, 6, 12, sse4)
492 mc_rep_bi_w(8, 8, 16, sse4)
493 mc_rep_bi_w(8, 8, 24, sse4)
494 mc_rep_bi_w(8, 8, 32, sse4)
495 mc_rep_bi_w(8, 8, 48, sse4)
496 mc_rep_bi_w(8, 8, 64, sse4)
497 
498 mc_rep_bi_w(10, 6, 12, sse4)
499 mc_rep_bi_w(10, 8, 16, sse4)
500 mc_rep_bi_w(10, 8, 24, sse4)
501 mc_rep_bi_w(10, 8, 32, sse4)
502 mc_rep_bi_w(10, 8, 48, sse4)
503 mc_rep_bi_w(10, 8, 64, sse4)
504 
505 mc_rep_bi_w(12, 6, 12, sse4)
506 mc_rep_bi_w(12, 8, 16, sse4)
507 mc_rep_bi_w(12, 8, 24, sse4)
508 mc_rep_bi_w(12, 8, 32, sse4)
509 mc_rep_bi_w(12, 8, 48, sse4)
510 mc_rep_bi_w(12, 8, 64, sse4)
511 
512 #define mc_uni_w_func(name, bitd, W, opt) \
513 void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
514  uint8_t *_src, ptrdiff_t _srcstride, \
515  int height, int denom, \
516  int _wx, int _ox, \
517  intptr_t mx, intptr_t my, int width) \
518 { \
519  LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
520  ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
521  ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\
522 }
523 
524 #define mc_uni_w_funcs(name, bitd, opt) \
525  mc_uni_w_func(name, bitd, 4, opt) \
526  mc_uni_w_func(name, bitd, 8, opt) \
527  mc_uni_w_func(name, bitd, 12, opt) \
528  mc_uni_w_func(name, bitd, 16, opt) \
529  mc_uni_w_func(name, bitd, 24, opt) \
530  mc_uni_w_func(name, bitd, 32, opt) \
531  mc_uni_w_func(name, bitd, 48, opt) \
532  mc_uni_w_func(name, bitd, 64, opt)
533 
534 mc_uni_w_funcs(pel_pixels, 8, sse4)
535 mc_uni_w_func(pel_pixels, 8, 6, sse4)
536 mc_uni_w_funcs(epel_h, 8, sse4)
537 mc_uni_w_func(epel_h, 8, 6, sse4)
538 mc_uni_w_funcs(epel_v, 8, sse4)
539 mc_uni_w_func(epel_v, 8, 6, sse4)
540 mc_uni_w_funcs(epel_hv, 8, sse4)
541 mc_uni_w_func(epel_hv, 8, 6, sse4)
542 mc_uni_w_funcs(qpel_h, 8, sse4)
543 mc_uni_w_funcs(qpel_v, 8, sse4)
544 mc_uni_w_funcs(qpel_hv, 8, sse4)
545 
546 mc_uni_w_funcs(pel_pixels, 10, sse4)
547 mc_uni_w_func(pel_pixels, 10, 6, sse4)
548 mc_uni_w_funcs(epel_h, 10, sse4)
549 mc_uni_w_func(epel_h, 10, 6, sse4)
550 mc_uni_w_funcs(epel_v, 10, sse4)
551 mc_uni_w_func(epel_v, 10, 6, sse4)
552 mc_uni_w_funcs(epel_hv, 10, sse4)
553 mc_uni_w_func(epel_hv, 10, 6, sse4)
554 mc_uni_w_funcs(qpel_h, 10, sse4)
555 mc_uni_w_funcs(qpel_v, 10, sse4)
556 mc_uni_w_funcs(qpel_hv, 10, sse4)
557 
558 mc_uni_w_funcs(pel_pixels, 12, sse4)
559 mc_uni_w_func(pel_pixels, 12, 6, sse4)
560 mc_uni_w_funcs(epel_h, 12, sse4)
561 mc_uni_w_func(epel_h, 12, 6, sse4)
562 mc_uni_w_funcs(epel_v, 12, sse4)
563 mc_uni_w_func(epel_v, 12, 6, sse4)
564 mc_uni_w_funcs(epel_hv, 12, sse4)
565 mc_uni_w_func(epel_hv, 12, 6, sse4)
566 mc_uni_w_funcs(qpel_h, 12, sse4)
567 mc_uni_w_funcs(qpel_v, 12, sse4)
568 mc_uni_w_funcs(qpel_hv, 12, sse4)
569 
570 #define mc_bi_w_func(name, bitd, W, opt) \
571 void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
572  uint8_t *_src, ptrdiff_t _srcstride, \
573  int16_t *_src2, \
574  int height, int denom, \
575  int _wx0, int _wx1, int _ox0, int _ox1, \
576  intptr_t mx, intptr_t my, int width) \
577 { \
578  LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
579  ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
580  ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \
581  height, denom, _wx0, _wx1, _ox0, _ox1); \
582 }
583 
584 #define mc_bi_w_funcs(name, bitd, opt) \
585  mc_bi_w_func(name, bitd, 4, opt) \
586  mc_bi_w_func(name, bitd, 8, opt) \
587  mc_bi_w_func(name, bitd, 12, opt) \
588  mc_bi_w_func(name, bitd, 16, opt) \
589  mc_bi_w_func(name, bitd, 24, opt) \
590  mc_bi_w_func(name, bitd, 32, opt) \
591  mc_bi_w_func(name, bitd, 48, opt) \
592  mc_bi_w_func(name, bitd, 64, opt)
593 
594 mc_bi_w_funcs(pel_pixels, 8, sse4)
595 mc_bi_w_func(pel_pixels, 8, 6, sse4)
596 mc_bi_w_funcs(epel_h, 8, sse4)
597 mc_bi_w_func(epel_h, 8, 6, sse4)
598 mc_bi_w_funcs(epel_v, 8, sse4)
599 mc_bi_w_func(epel_v, 8, 6, sse4)
600 mc_bi_w_funcs(epel_hv, 8, sse4)
601 mc_bi_w_func(epel_hv, 8, 6, sse4)
602 mc_bi_w_funcs(qpel_h, 8, sse4)
603 mc_bi_w_funcs(qpel_v, 8, sse4)
604 mc_bi_w_funcs(qpel_hv, 8, sse4)
605 
606 mc_bi_w_funcs(pel_pixels, 10, sse4)
607 mc_bi_w_func(pel_pixels, 10, 6, sse4)
608 mc_bi_w_funcs(epel_h, 10, sse4)
609 mc_bi_w_func(epel_h, 10, 6, sse4)
610 mc_bi_w_funcs(epel_v, 10, sse4)
611 mc_bi_w_func(epel_v, 10, 6, sse4)
612 mc_bi_w_funcs(epel_hv, 10, sse4)
613 mc_bi_w_func(epel_hv, 10, 6, sse4)
614 mc_bi_w_funcs(qpel_h, 10, sse4)
615 mc_bi_w_funcs(qpel_v, 10, sse4)
616 mc_bi_w_funcs(qpel_hv, 10, sse4)
617 
618 mc_bi_w_funcs(pel_pixels, 12, sse4)
619 mc_bi_w_func(pel_pixels, 12, 6, sse4)
620 mc_bi_w_funcs(epel_h, 12, sse4)
621 mc_bi_w_func(epel_h, 12, 6, sse4)
622 mc_bi_w_funcs(epel_v, 12, sse4)
623 mc_bi_w_func(epel_v, 12, 6, sse4)
624 mc_bi_w_funcs(epel_hv, 12, sse4)
625 mc_bi_w_func(epel_hv, 12, 6, sse4)
626 mc_bi_w_funcs(qpel_h, 12, sse4)
627 mc_bi_w_funcs(qpel_v, 12, sse4)
628 mc_bi_w_funcs(qpel_hv, 12, sse4)
629 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
630 
631 #define SAO_BAND_FILTER_FUNCS(bitd, opt) \
632 void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
633  int16_t *sao_offset_val, int sao_left_class, int width, int height); \
634 void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
635  int16_t *sao_offset_val, int sao_left_class, int width, int height); \
636 void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
637  int16_t *sao_offset_val, int sao_left_class, int width, int height); \
638 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
639  int16_t *sao_offset_val, int sao_left_class, int width, int height); \
640 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
641  int16_t *sao_offset_val, int sao_left_class, int width, int height);
642 
643 SAO_BAND_FILTER_FUNCS(8, sse2)
644 SAO_BAND_FILTER_FUNCS(10, sse2)
645 SAO_BAND_FILTER_FUNCS(12, sse2)
646 SAO_BAND_FILTER_FUNCS(8, avx)
647 SAO_BAND_FILTER_FUNCS(10, avx)
648 SAO_BAND_FILTER_FUNCS(12, avx)
649 SAO_BAND_FILTER_FUNCS(8, avx2)
650 SAO_BAND_FILTER_FUNCS(10, avx2)
651 SAO_BAND_FILTER_FUNCS(12, avx2)
652 
653 #define SAO_BAND_INIT(bitd, opt) do { \
654  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \
655  c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
656  c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
657  c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
658  c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
659 } while (0)
660 
661 #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \
662 void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
663  int eo, int width, int height); \
664 void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
665  int eo, int width, int height); \
666 void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
667  int eo, int width, int height); \
668 void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
669  int eo, int width, int height); \
670 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
671  int eo, int width, int height); \
672 
673 SAO_EDGE_FILTER_FUNCS(8, ssse3)
674 SAO_EDGE_FILTER_FUNCS(8, avx2)
675 SAO_EDGE_FILTER_FUNCS(10, sse2)
676 SAO_EDGE_FILTER_FUNCS(10, avx2)
677 SAO_EDGE_FILTER_FUNCS(12, sse2)
678 SAO_EDGE_FILTER_FUNCS(12, avx2)
679 
680 #define SAO_EDGE_INIT(bitd, opt) do { \
681  c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \
682  c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
683  c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
684  c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
685  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
686 } while (0)
687 
688 #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \
689  PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
690  PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \
691  PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
692  PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
693  PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
694  PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
695  PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
696  PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
697  PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
698 #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \
699  PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
700  PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
701  PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
702  PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
703  PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
704  PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
705  PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
706  PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
707 
709 {
710  int cpu_flags = av_get_cpu_flags();
711 
712  if (bit_depth == 8) {
713  if (EXTERNAL_MMXEXT(cpu_flags)) {
714  c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
715  c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext;
716 
718  }
719  if (EXTERNAL_SSE2(cpu_flags)) {
720  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
721  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
722  if (ARCH_X86_64) {
723  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
724  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
725 
726  c->idct[2] = ff_hevc_idct_16x16_8_sse2;
727  c->idct[3] = ff_hevc_idct_32x32_8_sse2;
728  }
729  SAO_BAND_INIT(8, sse2);
730 
731  c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
732  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
733  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
734 
735  c->idct[0] = ff_hevc_idct_4x4_8_sse2;
736  c->idct[1] = ff_hevc_idct_8x8_8_sse2;
737 
741  }
742  if (EXTERNAL_SSSE3(cpu_flags)) {
743  if(ARCH_X86_64) {
744  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
745  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
746  }
747  SAO_EDGE_INIT(8, ssse3);
748  }
749  if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
750 
751  EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
752  EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
753  EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4);
754  EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4);
755 
756  QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
757  QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4);
758  QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
759  QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
760  }
761  if (EXTERNAL_AVX(cpu_flags)) {
762  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
763  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
764  if (ARCH_X86_64) {
765  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
766  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
767 
768  c->idct[2] = ff_hevc_idct_16x16_8_avx;
769  c->idct[3] = ff_hevc_idct_32x32_8_avx;
770  }
771  SAO_BAND_INIT(8, avx);
772 
773  c->idct[0] = ff_hevc_idct_4x4_8_avx;
774  c->idct[1] = ff_hevc_idct_8x8_8_avx;
775 
779  }
780  if (EXTERNAL_AVX2(cpu_flags)) {
781  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
782  c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
783  }
784  if (EXTERNAL_AVX2_FAST(cpu_flags)) {
785  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
786  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
787  if (ARCH_X86_64) {
791 
795 
799 
803 
807 
811 
812  c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2;
813  c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2;
814  c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2;
815 
816  c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2;
817  c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2;
818  c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2;
819 
820  c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2;
821  c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2;
822  c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2;
823 
824  c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2;
825  c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2;
826  c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2;
827 
828  c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2;
829  c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2;
830  c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2;
831 
832  c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2;
833  c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2;
834  c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2;
835 
836  c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2;
837  c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2;
838  c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2;
839 
840  c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2;
841  c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2;
842  c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2;
843 
844  c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2;
845  c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2;
846  c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2;
847 
848  c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2;
849  c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2;
850  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2;
851 
852  c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2;
853  c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2;
854  c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2;
855 
856  c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2;
857  c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2;
858  c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2;
859 
860  c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2;
861  c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2;
862  c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2;
863 
864  c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2;
865  c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2;
866  c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2;
867 
868  c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2;
869  c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2;
870  c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2;
871  }
872  SAO_BAND_INIT(8, avx2);
873 
874  c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
875  c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
876  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
877 
879  }
880  } else if (bit_depth == 10) {
881  if (EXTERNAL_MMXEXT(cpu_flags)) {
883  c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
884  c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext;
885  }
886  if (EXTERNAL_SSE2(cpu_flags)) {
887  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
888  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
889  if (ARCH_X86_64) {
890  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
891  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
892 
893  c->idct[2] = ff_hevc_idct_16x16_10_sse2;
894  c->idct[3] = ff_hevc_idct_32x32_10_sse2;
895  }
896  SAO_BAND_INIT(10, sse2);
897  SAO_EDGE_INIT(10, sse2);
898 
899  c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
900  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
901  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
902 
903  c->idct[0] = ff_hevc_idct_4x4_10_sse2;
904  c->idct[1] = ff_hevc_idct_8x8_10_sse2;
905 
909  }
910  if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
911  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
912  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
913  }
914  if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
915  EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
916  EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
917  EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
918  EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4);
919 
920  QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
921  QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4);
922  QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
923  QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
924  }
925  if (EXTERNAL_AVX(cpu_flags)) {
926  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
927  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
928  if (ARCH_X86_64) {
929  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
930  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
931 
932  c->idct[2] = ff_hevc_idct_16x16_10_avx;
933  c->idct[3] = ff_hevc_idct_32x32_10_avx;
934  }
935 
936  c->idct[0] = ff_hevc_idct_4x4_10_avx;
937  c->idct[1] = ff_hevc_idct_8x8_10_avx;
938 
939  SAO_BAND_INIT(10, avx);
940  }
941  if (EXTERNAL_AVX2(cpu_flags)) {
942  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
943  }
944  if (EXTERNAL_AVX2_FAST(cpu_flags)) {
945  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
946  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
947  if (ARCH_X86_64) {
953 
959 
965 
971 
982 
983  c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2;
984  c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2;
985  c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2;
986  c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2;
987  c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2;
988 
989  c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2;
990  c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2;
991  c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2;
992  c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2;
993  c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2;
994 
995  c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2;
996  c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2;
997  c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2;
998  c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2;
999  c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2;
1000 
1001  c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2;
1002  c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2;
1003  c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2;
1004  c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2;
1005  c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2;
1006 
1007  c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2;
1008  c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2;
1009  c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2;
1010  c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2;
1011  c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2;
1012 
1013  c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2;
1014  c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2;
1015  c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2;
1016  c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2;
1017  c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2;
1018 
1019  c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2;
1020  c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2;
1021  c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2;
1022  c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2;
1023  c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2;
1024 
1025  c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2;
1026  c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2;
1027  c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2;
1028  c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2;
1029  c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2;
1030 
1031  c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2;
1032  c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2;
1033  c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2;
1034  c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2;
1035  c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2;
1036 
1037  c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2;
1038  c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2;
1039  c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2;
1040  c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2;
1041  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2;
1042 
1043  c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2;
1044  c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2;
1045  c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2;
1046  c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2;
1047  c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2;
1048 
1049  c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2;
1050  c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2;
1051  c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2;
1052  c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2;
1053  c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2;
1054 
1055  c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2;
1056  c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2;
1057  c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2;
1058  c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2;
1059  c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2;
1060 
1061  c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2;
1062  c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2;
1063  c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2;
1064  c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2;
1065  c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2;
1066 
1067  c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2;
1068  c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2;
1069  c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2;
1070  c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2;
1071  c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2;
1072 
1073  c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2;
1074  c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2;
1075  c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2;
1076  c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2;
1077  c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2;
1078 
1079  c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2;
1080  c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2;
1081  c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2;
1082  c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2;
1083  c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2;
1084 
1085  c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2;
1086  c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2;
1087  c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2;
1088  c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2;
1089  c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2;
1090  }
1091  SAO_BAND_INIT(10, avx2);
1092  SAO_EDGE_INIT(10, avx2);
1093 
1096  }
1097  } else if (bit_depth == 12) {
1098  if (EXTERNAL_MMXEXT(cpu_flags)) {
1099  c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
1100  c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_mmxext;
1101  }
1102  if (EXTERNAL_SSE2(cpu_flags)) {
1103  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
1104  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
1105  if (ARCH_X86_64) {
1106  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
1107  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
1108  }
1109  SAO_BAND_INIT(12, sse2);
1110  SAO_EDGE_INIT(12, sse2);
1111 
1112  c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
1113  c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
1114  c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
1115  }
1116  if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
1117  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
1118  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
1119  }
1120  if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
1121  EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
1122  EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
1123  EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
1124  EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4);
1125 
1126  QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
1127  QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4);
1128  QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
1129  QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
1130  }
1131  if (EXTERNAL_AVX(cpu_flags)) {
1132  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
1133  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
1134  if (ARCH_X86_64) {
1135  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
1136  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
1137  }
1138  SAO_BAND_INIT(12, avx);
1139  }
1140  if (EXTERNAL_AVX2(cpu_flags)) {
1141  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
1142  }
1143  if (EXTERNAL_AVX2_FAST(cpu_flags)) {
1144  c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
1145  c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;
1146 
1147  SAO_BAND_INIT(12, avx2);
1148  SAO_EDGE_INIT(12, avx2);
1149  }
1150  }
1151 }
void(* hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, int32_t *tc, uint8_t *no_p, uint8_t *no_q)
Definition: hevcdsp.h:109
void(* hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int beta, int32_t *tc, uint8_t *no_p, uint8_t *no_q)
Definition: hevcdsp.h:101
void ff_hevc_add_residual_16_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride)
void ff_hevc_put_hevc_pel_pixels16_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_put_hevc_bi_pel_pixels48_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_add_residual_32_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
#define SAO_EDGE_INIT(bitd, opt)
Definition: hevcdsp_init.c:680
void(* put_hevc_epel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp.h:89
static atomic_int cpu_flags
Definition: cpu.c:50
#define LFL_FUNCS(type, depth, opt)
Definition: hevcdsp_init.c:41
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
Definition: hevcdsp_init.c:708
#define SAO_EDGE_FILTER_FUNCS(bitd, opt)
Definition: hevcdsp_init.c:661
void(* idct[4])(int16_t *coeffs, int col_limit)
Definition: hevcdsp.h:57
void ff_hevc_put_hevc_uni_pel_pixels128_8_avx2(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_put_hevc_pel_pixels64_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
void(* put_hevc_epel[10][2][2])(int16_t *dst, uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp.h:86
void ff_hevc_put_hevc_bi_pel_pixels32_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
uint8_t
#define mc_rep_func(name, bitd, step, W, opt)
Definition: hevcdsp_init.c:87
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
#define mc_rep_funcs2(name, bitd, step1, step2, W, opt)
Definition: hevcdsp_init.c:169
bitstream reader API header.
void ff_hevc_put_hevc_pel_pixels32_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_add_residual_8_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
void ff_hevc_put_hevc_uni_pel_pixels48_8_avx2(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_put_hevc_pel_pixels64_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
void(* put_hevc_qpel[10][2][2])(int16_t *dst, uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp.h:72
void ff_hevc_add_residual_32_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride)
void ff_hevc_put_hevc_pel_pixels48_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
void ff_hevc_add_residual_8_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride)
#define mc_rep_funcs(name, bitd, step, W, opt)
Definition: hevcdsp_init.c:134
void(* sao_edge_filter[5])(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, int sao_eo_class, int width, int height)
Definition: hevcdsp.h:65
#define EPEL_LINKS(pointer, my, mx, fname, bitd, opt)
Definition: hevcdsp_init.c:688
void ff_hevc_put_hevc_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_put_hevc_uni_pel_pixels32_8_avx2(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_put_hevc_bi_pel_pixels24_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:226
void ff_hevc_add_residual_16_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
#define SAO_BAND_FILTER_FUNCS(bitd, opt)
Definition: hevcdsp_init.c:631
void(* idct_dc[4])(int16_t *coeffs)
Definition: hevcdsp.h:59
#define LFC_FUNCS(type, depth, opt)
Definition: hevcdsp_init.c:37
void(* put_hevc_qpel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp.h:79
#define mc_rep_uni_func(name, bitd, step, W, opt)
Definition: hevcdsp_init.c:101
void ff_hevc_add_residual_32_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
void(* hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int beta, int32_t *tc, uint8_t *no_p, uint8_t *no_q)
Definition: hevcdsp.h:104
void(* sao_band_filter[5])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, int16_t *sao_offset_val, int sao_left_class, int width, int height)
Definition: hevcdsp.h:61
void ff_hevc_add_residual_8_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride)
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
void ff_hevc_add_residual_4_10_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride)
void(* add_residual[4])(uint8_t *dst, int16_t *res, ptrdiff_t stride)
Definition: hevcdsp.h:49
void ff_hevc_add_residual_32_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
void ff_hevc_put_hevc_uni_pel_pixels96_8_avx2(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_put_hevc_pel_pixels24_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_put_hevc_bi_pel_pixels16_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
void(* put_hevc_qpel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp.h:74
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
#define EXTERNAL_AVX2(flags)
Definition: cpu.h:78
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
void(* hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, int32_t *tc, uint8_t *no_p, uint8_t *no_q)
Definition: hevcdsp.h:107
void ff_hevc_put_hevc_pel_pixels32_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_add_residual_16_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
void ff_hevc_put_hevc_bi_pel_pixels64_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
#define SAO_BAND_INIT(bitd, opt)
Definition: hevcdsp_init.c:653
#define mc_rep_bi_func(name, bitd, step, W, opt)
Definition: hevcdsp_init.c:116
void ff_hevc_put_hevc_pel_pixels48_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_put_hevc_bi_pel_pixels64_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
void(* put_hevc_epel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp.h:93
#define IDCT_DC_FUNCS(W, opt)
Definition: hevcdsp_init.c:61
void ff_hevc_add_residual_32_8_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
#define IDCT_FUNCS(opt)
Definition: hevcdsp_init.c:74
#define QPEL_LINKS(pointer, my, mx, fname, bitd, opt)
Definition: hevcdsp_init.c:698
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
void ff_hevc_add_residual_16_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
void ff_hevc_put_hevc_bi_pel_pixels48_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
void ff_hevc_put_hevc_uni_pel_pixels64_8_avx2(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)