FFmpeg
dsp_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 Seppo Tomperi
3  * Copyright (c) 2013-2014 Pierre-Edouard Lepere
4  * Copyright (c) 2023-2024 Wu Jianhua
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "config.h"
24 
25 #include "libavutil/cpu.h"
26 #include "libavutil/mem_internal.h"
27 #include "libavutil/x86/asm.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavcodec/hevc/dsp.h"
30 #include "libavcodec/x86/hevcdsp.h"
32 
33 #define LFC_FUNC(DIR, DEPTH, OPT) \
34 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
35 
36 #define LFL_FUNC(DIR, DEPTH, OPT) \
37 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
38 
39 #define LFC_FUNCS(type, depth, opt) \
40  LFC_FUNC(h, depth, opt) \
41  LFC_FUNC(v, depth, opt)
42 
43 #define LFL_FUNCS(type, depth, opt) \
44  LFL_FUNC(h, depth, opt) \
45  LFL_FUNC(v, depth, opt)
46 
47 LFC_FUNCS(uint8_t, 8, sse2)
48 LFC_FUNCS(uint8_t, 10, sse2)
49 LFC_FUNCS(uint8_t, 12, sse2)
50 LFC_FUNCS(uint8_t, 8, avx)
51 LFC_FUNCS(uint8_t, 10, avx)
52 LFC_FUNCS(uint8_t, 12, avx)
53 LFL_FUNCS(uint8_t, 8, sse2)
54 LFL_FUNCS(uint8_t, 10, sse2)
55 LFL_FUNCS(uint8_t, 12, sse2)
56 LFL_FUNCS(uint8_t, 8, ssse3)
57 LFL_FUNCS(uint8_t, 10, ssse3)
58 LFL_FUNCS(uint8_t, 12, ssse3)
59 LFL_FUNCS(uint8_t, 8, avx)
60 LFL_FUNCS(uint8_t, 10, avx)
61 LFL_FUNCS(uint8_t, 12, avx)
62 
63 #define IDCT_DC_FUNCS(W, opt) \
64 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
65 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
66 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
67 
68 IDCT_DC_FUNCS(4x4, mmxext);
69 IDCT_DC_FUNCS(8x8, sse2);
70 IDCT_DC_FUNCS(16x16, sse2);
71 IDCT_DC_FUNCS(32x32, sse2);
72 IDCT_DC_FUNCS(16x16, avx2);
73 IDCT_DC_FUNCS(32x32, avx2);
74 
75 #define IDCT_FUNCS(opt) \
76 void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
77 void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
78 void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
79 void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
80 void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
81 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
82 void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
83 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
84 
85 IDCT_FUNCS(sse2)
86 IDCT_FUNCS(avx)
87 
88 
89 #define ff_hevc_pel_filters ff_hevc_qpel_filters
90 #define DECL_HV_FILTER(f) \
91  const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
92  const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
93 
94 #define FW_PUT(p, a, b, depth, opt) \
95 void ff_hevc_put_hevc_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
96  int height, intptr_t mx, intptr_t my,int width) \
97 { \
98  DECL_HV_FILTER(p) \
99  ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
100 }
101 
102 #define FW_PUT_UNI(p, a, b, depth, opt) \
103 void ff_hevc_put_hevc_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \
104  const uint8_t *src, ptrdiff_t srcstride, \
105  int height, intptr_t mx, intptr_t my, int width) \
106 { \
107  DECL_HV_FILTER(p) \
108  ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width); \
109 }
110 
111 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
112 
113 #define FW_PUT_FUNCS(p, a, b, depth, opt) \
114  FW_PUT(p, a, b, depth, opt) \
115  FW_PUT_UNI(p, a, b, depth, opt)
116 
117 #define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt)
118 
119 #define FW_DIR(npel, n, w, depth, opt) \
120  FW_PUT_FUNCS(npel, npel ## _h##w, n ## tap_h##w, depth, opt) \
121  FW_PUT_FUNCS(npel, npel ## _v##w, n ## tap_v##w, depth, opt)
122 
123 #define FW_DIR_HV(npel, n, w, depth, opt) \
124  FW_PUT_FUNCS(npel, npel ## _hv##w, n ## tap_hv##w, depth, opt)
125 
126 FW_PEL(4, 8, sse4)
127 FW_PEL(6, 8, sse4)
128 FW_PEL(8, 8, sse4)
129 FW_PEL(12, 8, sse4)
130 FW_PEL(16, 8, sse4)
131 FW_PEL(4, 10, sse4)
132 FW_PEL(6, 10, sse4)
133 FW_PEL(8, 10, sse4)
134 FW_PEL(4, 12, sse4)
135 FW_PEL(6, 12, sse4)
136 FW_PEL(8, 12, sse4)
137 
138 #define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt)
139 #define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt)
140 #define FW_EPEL_FUNCS(w, depth, opt) \
141  FW_EPEL(w, depth, opt) \
142  FW_EPEL_HV(w, depth, opt)
143 
144 FW_EPEL(12, 8, sse4)
145 
146 FW_EPEL_FUNCS(4, 8, sse4)
147 FW_EPEL_FUNCS(6, 8, sse4)
148 FW_EPEL_FUNCS(8, 8, sse4)
149 FW_EPEL_FUNCS(16, 8, sse4)
150 FW_EPEL_FUNCS(4, 10, sse4)
151 FW_EPEL_FUNCS(6, 10, sse4)
152 FW_EPEL_FUNCS(8, 10, sse4)
153 FW_EPEL_FUNCS(4, 12, sse4)
154 FW_EPEL_FUNCS(6, 12, sse4)
155 FW_EPEL_FUNCS(8, 12, sse4)
156 
157 #define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt)
158 #define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt)
159 #define FW_QPEL_FUNCS(w, depth, opt) \
160  FW_QPEL(w, depth, opt) \
161  FW_QPEL_HV(w, depth, opt)
162 
163 FW_QPEL(12, 8, sse4)
164 FW_QPEL(16, 8, sse4)
165 
166 FW_QPEL_FUNCS(4, 8, sse4)
167 FW_QPEL_FUNCS(8, 8, sse4)
168 FW_QPEL_FUNCS(4, 10, sse4)
169 FW_QPEL_FUNCS(8, 10, sse4)
170 FW_QPEL_FUNCS(4, 12, sse4)
171 FW_QPEL_FUNCS(8, 12, sse4)
172 
173 #if HAVE_AVX2_EXTERNAL
174 
175 FW_PEL(32, 8, avx2)
176 FW_PUT(pel, pel_pixels16, pixels16, 10, avx2)
177 
178 FW_EPEL(32, 8, avx2)
179 FW_EPEL(16, 10, avx2)
180 
181 FW_EPEL_HV(32, 8, avx2)
182 FW_EPEL_HV(16, 10, avx2)
183 
184 FW_QPEL(32, 8, avx2)
185 FW_QPEL(16, 10, avx2)
186 
187 FW_QPEL_HV(16, 10, avx2)
188 
189 #endif
190 #endif
191 
192 #define mc_rep_func(name, bitd, step, W, opt) \
193 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \
194  const uint8_t *_src, ptrdiff_t _srcstride, int height, \
195  intptr_t mx, intptr_t my, int width) \
196 { \
197  int i; \
198  int16_t *dst; \
199  for (i = 0; i < W; i += step) { \
200  const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
201  dst = _dst + i; \
202  ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
203  } \
204 }
205 #define mc_rep_uni_func(name, bitd, step, W, opt) \
206 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \
207  const uint8_t *_src, ptrdiff_t _srcstride, int height, \
208  intptr_t mx, intptr_t my, int width) \
209 { \
210  int i; \
211  uint8_t *dst; \
212  for (i = 0; i < W; i += step) { \
213  const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
214  dst = _dst + (i * ((bitd + 7) / 8)); \
215  ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
216  height, mx, my, width); \
217  } \
218 }
219 #define mc_rep_bi_func(name, bitd, step, W, opt) \
220 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \
221  ptrdiff_t _srcstride, const int16_t *_src2, \
222  int height, intptr_t mx, intptr_t my, int width) \
223 { \
224  int i; \
225  uint8_t *dst; \
226  for (i = 0; i < W ; i += step) { \
227  const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
228  const int16_t *src2 = _src2 + i; \
229  dst = _dst + (i * ((bitd + 7) / 8)); \
230  ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \
231  height, mx, my, width); \
232  } \
233 }
234 
235 #define mc_rep_funcs(name, bitd, step, W, opt) \
236  mc_rep_func(name, bitd, step, W, opt) \
237  mc_rep_uni_func(name, bitd, step, W, opt) \
238  mc_rep_bi_func(name, bitd, step, W, opt)
239 
240 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
241 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst, \
242  const uint8_t *src, ptrdiff_t _srcstride, int height, \
243  intptr_t mx, intptr_t my, int width) \
244 { \
245  ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
246  ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \
247  _srcstride, height, mx, my, width); \
248 }
249 #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
250 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \
251  const uint8_t *src, ptrdiff_t _srcstride, int height, \
252  intptr_t mx, intptr_t my, int width) \
253 { \
254  ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\
255  ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
256  src + (step1 * ((bitd + 7) / 8)), _srcstride, \
257  height, mx, my, width); \
258 }
259 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
260 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
261  ptrdiff_t _srcstride, const int16_t *src2, \
262  int height, intptr_t mx, intptr_t my, int width) \
263 { \
264  ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
265  ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
266  src + (step1 * ((bitd + 7) / 8)), _srcstride, \
267  src2 + step1, height, mx, my, width); \
268 }
269 
270 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
271  mc_rep_func2(name, bitd, step1, step2, W, opt) \
272  mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
273  mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
274 
275 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
276 
277 #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
278 void ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
279  int height, intptr_t mx, intptr_t my, int width) \
280  \
281 { \
282  ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \
283  ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
284 }
285 
286 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
287 void ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
288  ptrdiff_t _srcstride, const int16_t *src2, \
289  int height, intptr_t mx, intptr_t my, int width) \
290 { \
291  ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \
292  height, mx, my, width); \
293  ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\
294  height, mx, my, width); \
295 }
296 
297 #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
298 void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \
299  const uint8_t *src, ptrdiff_t _srcstride, int height, \
300  intptr_t mx, intptr_t my, int width) \
301 { \
302  ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \
303  height, mx, my, width); \
304  ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \
305  height, mx, my, width); \
306 }
307 
308 #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \
309 mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
310 mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
311 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
312 
313 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
314 void ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
315  int height, intptr_t mx, intptr_t my, int width) \
316  \
317 { \
318  ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \
319  ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \
320 }
321 
322 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
323 void ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
324  ptrdiff_t _srcstride, const int16_t *src2, \
325  int height, intptr_t mx, intptr_t my, int width) \
326 { \
327  ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
328  src2, height, mx, my, width); \
329  ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
330  src2+width2, height, mx, my, width); \
331 }
332 
333 #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
334 void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \
335  const uint8_t *src, ptrdiff_t _srcstride, int height, \
336  intptr_t mx, intptr_t my, int width) \
337 { \
338  ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
339  height, mx, my, width); \
340  ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
341  height, mx, my, width); \
342 }
343 
344 #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \
345 mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
346 mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
347 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
348 
349 #if HAVE_AVX2_EXTERNAL
350 
351 mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
352 mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4)
353 mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4)
354 mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4)
355 
356 mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
357 mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
358 mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32)
359 mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32)
360 mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32)
361 
362 
363 mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32)
364 mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32)
365 mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32)
366 
367 
368 mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit
369 mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit
370 
371 mc_rep_funcs(pel_pixels, 8, 32, 64, avx2)
372 
373 mc_rep_func(pel_pixels, 10, 16, 32, avx2)
374 mc_rep_func(pel_pixels, 10, 16, 48, avx2)
375 mc_rep_func(pel_pixels, 10, 32, 64, avx2)
376 
377 mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2)
378 mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2)
379 mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2)
380 
381 mc_rep_funcs(epel_h, 8, 32, 64, avx2)
382 
383 mc_rep_funcs(epel_v, 8, 32, 64, avx2)
384 
385 mc_rep_funcs(epel_h, 10, 16, 32, avx2)
386 mc_rep_funcs(epel_h, 10, 16, 48, avx2)
387 mc_rep_funcs(epel_h, 10, 32, 64, avx2)
388 
389 mc_rep_funcs(epel_v, 10, 16, 32, avx2)
390 mc_rep_funcs(epel_v, 10, 16, 48, avx2)
391 mc_rep_funcs(epel_v, 10, 32, 64, avx2)
392 
393 
394 mc_rep_funcs(epel_hv, 8, 32, 64, avx2)
395 
396 mc_rep_funcs(epel_hv, 10, 16, 32, avx2)
397 mc_rep_funcs(epel_hv, 10, 16, 48, avx2)
398 mc_rep_funcs(epel_hv, 10, 32, 64, avx2)
399 
400 mc_rep_funcs(qpel_h, 8, 32, 64, avx2)
401 mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4)
402 
403 mc_rep_funcs(qpel_v, 8, 32, 64, avx2)
404 mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4)
405 
406 mc_rep_funcs(qpel_h, 10, 16, 32, avx2)
407 mc_rep_funcs(qpel_h, 10, 16, 48, avx2)
408 mc_rep_funcs(qpel_h, 10, 32, 64, avx2)
409 
410 mc_rep_funcs(qpel_v, 10, 16, 32, avx2)
411 mc_rep_funcs(qpel_v, 10, 16, 48, avx2)
412 mc_rep_funcs(qpel_v, 10, 32, 64, avx2)
413 
414 mc_rep_funcs(qpel_hv, 10, 16, 32, avx2)
415 mc_rep_funcs(qpel_hv, 10, 16, 48, avx2)
416 mc_rep_funcs(qpel_hv, 10, 32, 64, avx2)
417 
418 #endif //AVX2
419 
420 mc_rep_funcs(pel_pixels, 8, 16, 64, sse4)
421 mc_rep_funcs(pel_pixels, 8, 16, 48, sse4)
422 mc_rep_funcs(pel_pixels, 8, 16, 32, sse4)
423 mc_rep_funcs(pel_pixels, 8, 8, 24, sse4)
424 mc_rep_funcs(pel_pixels,10, 8, 64, sse4)
425 mc_rep_funcs(pel_pixels,10, 8, 48, sse4)
426 mc_rep_funcs(pel_pixels,10, 8, 32, sse4)
427 mc_rep_funcs(pel_pixels,10, 8, 24, sse4)
428 mc_rep_funcs(pel_pixels,10, 8, 16, sse4)
429 mc_rep_funcs(pel_pixels,10, 4, 12, sse4)
430 mc_rep_funcs(pel_pixels,12, 8, 64, sse4)
431 mc_rep_funcs(pel_pixels,12, 8, 48, sse4)
432 mc_rep_funcs(pel_pixels,12, 8, 32, sse4)
433 mc_rep_funcs(pel_pixels,12, 8, 24, sse4)
434 mc_rep_funcs(pel_pixels,12, 8, 16, sse4)
435 mc_rep_funcs(pel_pixels,12, 4, 12, sse4)
436 
437 mc_rep_funcs(epel_h, 8, 16, 64, sse4)
438 mc_rep_funcs(epel_h, 8, 16, 48, sse4)
439 mc_rep_funcs(epel_h, 8, 16, 32, sse4)
440 mc_rep_funcs(epel_h, 8, 8, 24, sse4)
441 mc_rep_funcs(epel_h,10, 8, 64, sse4)
442 mc_rep_funcs(epel_h,10, 8, 48, sse4)
443 mc_rep_funcs(epel_h,10, 8, 32, sse4)
444 mc_rep_funcs(epel_h,10, 8, 24, sse4)
445 mc_rep_funcs(epel_h,10, 8, 16, sse4)
446 mc_rep_funcs(epel_h,10, 4, 12, sse4)
447 mc_rep_funcs(epel_h,12, 8, 64, sse4)
448 mc_rep_funcs(epel_h,12, 8, 48, sse4)
449 mc_rep_funcs(epel_h,12, 8, 32, sse4)
450 mc_rep_funcs(epel_h,12, 8, 24, sse4)
451 mc_rep_funcs(epel_h,12, 8, 16, sse4)
452 mc_rep_funcs(epel_h,12, 4, 12, sse4)
453 mc_rep_funcs(epel_v, 8, 16, 64, sse4)
454 mc_rep_funcs(epel_v, 8, 16, 48, sse4)
455 mc_rep_funcs(epel_v, 8, 16, 32, sse4)
456 mc_rep_funcs(epel_v, 8, 8, 24, sse4)
457 mc_rep_funcs(epel_v,10, 8, 64, sse4)
458 mc_rep_funcs(epel_v,10, 8, 48, sse4)
459 mc_rep_funcs(epel_v,10, 8, 32, sse4)
460 mc_rep_funcs(epel_v,10, 8, 24, sse4)
461 mc_rep_funcs(epel_v,10, 8, 16, sse4)
462 mc_rep_funcs(epel_v,10, 4, 12, sse4)
463 mc_rep_funcs(epel_v,12, 8, 64, sse4)
464 mc_rep_funcs(epel_v,12, 8, 48, sse4)
465 mc_rep_funcs(epel_v,12, 8, 32, sse4)
466 mc_rep_funcs(epel_v,12, 8, 24, sse4)
467 mc_rep_funcs(epel_v,12, 8, 16, sse4)
468 mc_rep_funcs(epel_v,12, 4, 12, sse4)
469 mc_rep_funcs(epel_hv, 8, 16, 64, sse4)
470 mc_rep_funcs(epel_hv, 8, 16, 48, sse4)
471 mc_rep_funcs(epel_hv, 8, 16, 32, sse4)
472 mc_rep_funcs(epel_hv, 8, 8, 24, sse4)
473 mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4)
474 mc_rep_funcs(epel_hv,10, 8, 64, sse4)
475 mc_rep_funcs(epel_hv,10, 8, 48, sse4)
476 mc_rep_funcs(epel_hv,10, 8, 32, sse4)
477 mc_rep_funcs(epel_hv,10, 8, 24, sse4)
478 mc_rep_funcs(epel_hv,10, 8, 16, sse4)
479 mc_rep_funcs(epel_hv,10, 4, 12, sse4)
480 mc_rep_funcs(epel_hv,12, 8, 64, sse4)
481 mc_rep_funcs(epel_hv,12, 8, 48, sse4)
482 mc_rep_funcs(epel_hv,12, 8, 32, sse4)
483 mc_rep_funcs(epel_hv,12, 8, 24, sse4)
484 mc_rep_funcs(epel_hv,12, 8, 16, sse4)
485 mc_rep_funcs(epel_hv,12, 4, 12, sse4)
486 
487 mc_rep_funcs(qpel_h, 8, 16, 64, sse4)
488 mc_rep_funcs(qpel_h, 8, 16, 48, sse4)
489 mc_rep_funcs(qpel_h, 8, 16, 32, sse4)
490 mc_rep_funcs(qpel_h, 8, 8, 24, sse4)
491 mc_rep_funcs(qpel_h,10, 8, 64, sse4)
492 mc_rep_funcs(qpel_h,10, 8, 48, sse4)
493 mc_rep_funcs(qpel_h,10, 8, 32, sse4)
494 mc_rep_funcs(qpel_h,10, 8, 24, sse4)
495 mc_rep_funcs(qpel_h,10, 8, 16, sse4)
496 mc_rep_funcs(qpel_h,10, 4, 12, sse4)
497 mc_rep_funcs(qpel_h,12, 8, 64, sse4)
498 mc_rep_funcs(qpel_h,12, 8, 48, sse4)
499 mc_rep_funcs(qpel_h,12, 8, 32, sse4)
500 mc_rep_funcs(qpel_h,12, 8, 24, sse4)
501 mc_rep_funcs(qpel_h,12, 8, 16, sse4)
502 mc_rep_funcs(qpel_h,12, 4, 12, sse4)
503 mc_rep_funcs(qpel_v, 8, 16, 64, sse4)
504 mc_rep_funcs(qpel_v, 8, 16, 48, sse4)
505 mc_rep_funcs(qpel_v, 8, 16, 32, sse4)
506 mc_rep_funcs(qpel_v, 8, 8, 24, sse4)
507 mc_rep_funcs(qpel_v,10, 8, 64, sse4)
508 mc_rep_funcs(qpel_v,10, 8, 48, sse4)
509 mc_rep_funcs(qpel_v,10, 8, 32, sse4)
510 mc_rep_funcs(qpel_v,10, 8, 24, sse4)
511 mc_rep_funcs(qpel_v,10, 8, 16, sse4)
512 mc_rep_funcs(qpel_v,10, 4, 12, sse4)
513 mc_rep_funcs(qpel_v,12, 8, 64, sse4)
514 mc_rep_funcs(qpel_v,12, 8, 48, sse4)
515 mc_rep_funcs(qpel_v,12, 8, 32, sse4)
516 mc_rep_funcs(qpel_v,12, 8, 24, sse4)
517 mc_rep_funcs(qpel_v,12, 8, 16, sse4)
518 mc_rep_funcs(qpel_v,12, 4, 12, sse4)
519 mc_rep_funcs(qpel_hv, 8, 8, 64, sse4)
520 mc_rep_funcs(qpel_hv, 8, 8, 48, sse4)
521 mc_rep_funcs(qpel_hv, 8, 8, 32, sse4)
522 mc_rep_funcs(qpel_hv, 8, 8, 24, sse4)
523 mc_rep_funcs(qpel_hv, 8, 8, 16, sse4)
524 mc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4)
525 mc_rep_funcs(qpel_hv,10, 8, 64, sse4)
526 mc_rep_funcs(qpel_hv,10, 8, 48, sse4)
527 mc_rep_funcs(qpel_hv,10, 8, 32, sse4)
528 mc_rep_funcs(qpel_hv,10, 8, 24, sse4)
529 mc_rep_funcs(qpel_hv,10, 8, 16, sse4)
530 mc_rep_funcs(qpel_hv,10, 4, 12, sse4)
531 mc_rep_funcs(qpel_hv,12, 8, 64, sse4)
532 mc_rep_funcs(qpel_hv,12, 8, 48, sse4)
533 mc_rep_funcs(qpel_hv,12, 8, 32, sse4)
534 mc_rep_funcs(qpel_hv,12, 8, 24, sse4)
535 mc_rep_funcs(qpel_hv,12, 8, 16, sse4)
536 mc_rep_funcs(qpel_hv,12, 4, 12, sse4)
537 
538 #define mc_rep_uni_w(bitd, step, W, opt) \
539 void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
540  int height, int denom, int _wx, int _ox) \
541 { \
542  int i; \
543  uint8_t *dst; \
544  for (i = 0; i < W; i += step) { \
545  const int16_t *src = _src + i; \
546  dst= _dst + (i * ((bitd + 7) / 8)); \
547  ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, \
548  height, denom, _wx, _ox); \
549  } \
550 }
551 
552 mc_rep_uni_w(8, 6, 12, sse4)
553 mc_rep_uni_w(8, 8, 16, sse4)
554 mc_rep_uni_w(8, 8, 24, sse4)
555 mc_rep_uni_w(8, 8, 32, sse4)
556 mc_rep_uni_w(8, 8, 48, sse4)
557 mc_rep_uni_w(8, 8, 64, sse4)
558 
559 mc_rep_uni_w(10, 6, 12, sse4)
560 mc_rep_uni_w(10, 8, 16, sse4)
561 mc_rep_uni_w(10, 8, 24, sse4)
562 mc_rep_uni_w(10, 8, 32, sse4)
563 mc_rep_uni_w(10, 8, 48, sse4)
564 mc_rep_uni_w(10, 8, 64, sse4)
565 
566 mc_rep_uni_w(12, 6, 12, sse4)
567 mc_rep_uni_w(12, 8, 16, sse4)
568 mc_rep_uni_w(12, 8, 24, sse4)
569 mc_rep_uni_w(12, 8, 32, sse4)
570 mc_rep_uni_w(12, 8, 48, sse4)
571 mc_rep_uni_w(12, 8, 64, sse4)
572 
573 #define mc_rep_bi_w(bitd, step, W, opt) \
574 void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
575  const int16_t *_src2, int height, \
576  int denom, int _wx0, int _wx1, int _ox0, int _ox1) \
577 { \
578  int i; \
579  uint8_t *dst; \
580  for (i = 0; i < W; i += step) { \
581  const int16_t *src = _src + i; \
582  const int16_t *src2 = _src2 + i; \
583  dst = _dst + (i * ((bitd + 7) / 8)); \
584  ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \
585  height, denom, _wx0, _wx1, _ox0, _ox1); \
586  } \
587 }
588 
589 mc_rep_bi_w(8, 6, 12, sse4)
590 mc_rep_bi_w(8, 8, 16, sse4)
591 mc_rep_bi_w(8, 8, 24, sse4)
592 mc_rep_bi_w(8, 8, 32, sse4)
593 mc_rep_bi_w(8, 8, 48, sse4)
594 mc_rep_bi_w(8, 8, 64, sse4)
595 
596 mc_rep_bi_w(10, 6, 12, sse4)
597 mc_rep_bi_w(10, 8, 16, sse4)
598 mc_rep_bi_w(10, 8, 24, sse4)
599 mc_rep_bi_w(10, 8, 32, sse4)
600 mc_rep_bi_w(10, 8, 48, sse4)
601 mc_rep_bi_w(10, 8, 64, sse4)
602 
603 mc_rep_bi_w(12, 6, 12, sse4)
604 mc_rep_bi_w(12, 8, 16, sse4)
605 mc_rep_bi_w(12, 8, 24, sse4)
606 mc_rep_bi_w(12, 8, 32, sse4)
607 mc_rep_bi_w(12, 8, 48, sse4)
608 mc_rep_bi_w(12, 8, 64, sse4)
609 
610 #define mc_uni_w_func(name, bitd, W, opt) \
611 void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
612  const uint8_t *_src, ptrdiff_t _srcstride, \
613  int height, int denom, \
614  int _wx, int _ox, \
615  intptr_t mx, intptr_t my, int width) \
616 { \
617  LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
618  ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
619  ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\
620 }
621 
622 #define mc_uni_w_funcs(name, bitd, opt) \
623  mc_uni_w_func(name, bitd, 4, opt) \
624  mc_uni_w_func(name, bitd, 8, opt) \
625  mc_uni_w_func(name, bitd, 12, opt) \
626  mc_uni_w_func(name, bitd, 16, opt) \
627  mc_uni_w_func(name, bitd, 24, opt) \
628  mc_uni_w_func(name, bitd, 32, opt) \
629  mc_uni_w_func(name, bitd, 48, opt) \
630  mc_uni_w_func(name, bitd, 64, opt)
631 
632 mc_uni_w_funcs(pel_pixels, 8, sse4)
633 mc_uni_w_func(pel_pixels, 8, 6, sse4)
634 mc_uni_w_funcs(epel_h, 8, sse4)
635 mc_uni_w_func(epel_h, 8, 6, sse4)
636 mc_uni_w_funcs(epel_v, 8, sse4)
637 mc_uni_w_func(epel_v, 8, 6, sse4)
638 mc_uni_w_funcs(epel_hv, 8, sse4)
639 mc_uni_w_func(epel_hv, 8, 6, sse4)
640 mc_uni_w_funcs(qpel_h, 8, sse4)
641 mc_uni_w_funcs(qpel_v, 8, sse4)
642 mc_uni_w_funcs(qpel_hv, 8, sse4)
643 
644 mc_uni_w_funcs(pel_pixels, 10, sse4)
645 mc_uni_w_func(pel_pixels, 10, 6, sse4)
646 mc_uni_w_funcs(epel_h, 10, sse4)
647 mc_uni_w_func(epel_h, 10, 6, sse4)
648 mc_uni_w_funcs(epel_v, 10, sse4)
649 mc_uni_w_func(epel_v, 10, 6, sse4)
650 mc_uni_w_funcs(epel_hv, 10, sse4)
651 mc_uni_w_func(epel_hv, 10, 6, sse4)
652 mc_uni_w_funcs(qpel_h, 10, sse4)
653 mc_uni_w_funcs(qpel_v, 10, sse4)
654 mc_uni_w_funcs(qpel_hv, 10, sse4)
655 
656 mc_uni_w_funcs(pel_pixels, 12, sse4)
657 mc_uni_w_func(pel_pixels, 12, 6, sse4)
658 mc_uni_w_funcs(epel_h, 12, sse4)
659 mc_uni_w_func(epel_h, 12, 6, sse4)
660 mc_uni_w_funcs(epel_v, 12, sse4)
661 mc_uni_w_func(epel_v, 12, 6, sse4)
662 mc_uni_w_funcs(epel_hv, 12, sse4)
663 mc_uni_w_func(epel_hv, 12, 6, sse4)
664 mc_uni_w_funcs(qpel_h, 12, sse4)
665 mc_uni_w_funcs(qpel_v, 12, sse4)
666 mc_uni_w_funcs(qpel_hv, 12, sse4)
667 
668 #define mc_bi_w_func(name, bitd, W, opt) \
669 void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
670  const uint8_t *_src, ptrdiff_t _srcstride, \
671  const int16_t *_src2, \
672  int height, int denom, \
673  int _wx0, int _wx1, int _ox0, int _ox1, \
674  intptr_t mx, intptr_t my, int width) \
675 { \
676  LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
677  ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
678  ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \
679  height, denom, _wx0, _wx1, _ox0, _ox1); \
680 }
681 
682 #define mc_bi_w_funcs(name, bitd, opt) \
683  mc_bi_w_func(name, bitd, 4, opt) \
684  mc_bi_w_func(name, bitd, 8, opt) \
685  mc_bi_w_func(name, bitd, 12, opt) \
686  mc_bi_w_func(name, bitd, 16, opt) \
687  mc_bi_w_func(name, bitd, 24, opt) \
688  mc_bi_w_func(name, bitd, 32, opt) \
689  mc_bi_w_func(name, bitd, 48, opt) \
690  mc_bi_w_func(name, bitd, 64, opt)
691 
692 mc_bi_w_funcs(pel_pixels, 8, sse4)
693 mc_bi_w_func(pel_pixels, 8, 6, sse4)
694 mc_bi_w_funcs(epel_h, 8, sse4)
695 mc_bi_w_func(epel_h, 8, 6, sse4)
696 mc_bi_w_funcs(epel_v, 8, sse4)
697 mc_bi_w_func(epel_v, 8, 6, sse4)
698 mc_bi_w_funcs(epel_hv, 8, sse4)
699 mc_bi_w_func(epel_hv, 8, 6, sse4)
700 mc_bi_w_funcs(qpel_h, 8, sse4)
701 mc_bi_w_funcs(qpel_v, 8, sse4)
702 mc_bi_w_funcs(qpel_hv, 8, sse4)
703 
704 mc_bi_w_funcs(pel_pixels, 10, sse4)
705 mc_bi_w_func(pel_pixels, 10, 6, sse4)
706 mc_bi_w_funcs(epel_h, 10, sse4)
707 mc_bi_w_func(epel_h, 10, 6, sse4)
708 mc_bi_w_funcs(epel_v, 10, sse4)
709 mc_bi_w_func(epel_v, 10, 6, sse4)
710 mc_bi_w_funcs(epel_hv, 10, sse4)
711 mc_bi_w_func(epel_hv, 10, 6, sse4)
712 mc_bi_w_funcs(qpel_h, 10, sse4)
713 mc_bi_w_funcs(qpel_v, 10, sse4)
714 mc_bi_w_funcs(qpel_hv, 10, sse4)
715 
716 mc_bi_w_funcs(pel_pixels, 12, sse4)
717 mc_bi_w_func(pel_pixels, 12, 6, sse4)
718 mc_bi_w_funcs(epel_h, 12, sse4)
719 mc_bi_w_func(epel_h, 12, 6, sse4)
720 mc_bi_w_funcs(epel_v, 12, sse4)
721 mc_bi_w_func(epel_v, 12, 6, sse4)
722 mc_bi_w_funcs(epel_hv, 12, sse4)
723 mc_bi_w_func(epel_hv, 12, 6, sse4)
724 mc_bi_w_funcs(qpel_h, 12, sse4)
725 mc_bi_w_funcs(qpel_v, 12, sse4)
726 mc_bi_w_funcs(qpel_hv, 12, sse4)
727 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
728 
729 #define SAO_BAND_FILTER_FUNCS(bitd, opt) \
730 void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
731  const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
732 void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
733  const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
734 void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
735  const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
736 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
737  const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
738 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
739  const int16_t *sao_offset_val, int sao_left_class, int width, int height);
740 
741 SAO_BAND_FILTER_FUNCS(8, sse2)
742 SAO_BAND_FILTER_FUNCS(10, sse2)
743 SAO_BAND_FILTER_FUNCS(12, sse2)
744 SAO_BAND_FILTER_FUNCS(8, avx)
745 SAO_BAND_FILTER_FUNCS(10, avx)
746 SAO_BAND_FILTER_FUNCS(12, avx)
747 SAO_BAND_FILTER_FUNCS(8, avx2)
748 SAO_BAND_FILTER_FUNCS(10, avx2)
749 SAO_BAND_FILTER_FUNCS(12, avx2)
750 
751 #define SAO_BAND_INIT(bitd, opt) do { \
752  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \
753  c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
754  c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
755  c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
756  c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
757 } while (0)
758 
759 #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \
760 void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
761  const int16_t *sao_offset_val, int eo, int width, int height); \
762 void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
763  const int16_t *sao_offset_val, int eo, int width, int height); \
764 void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
765  const int16_t *sao_offset_val, int eo, int width, int height); \
766 void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
767  const int16_t *sao_offset_val, int eo, int width, int height); \
768 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
769  const int16_t *sao_offset_val, int eo, int width, int height); \
770 
771 SAO_EDGE_FILTER_FUNCS(8, ssse3)
772 SAO_EDGE_FILTER_FUNCS(8, avx2)
773 SAO_EDGE_FILTER_FUNCS(10, sse2)
774 SAO_EDGE_FILTER_FUNCS(10, avx2)
775 SAO_EDGE_FILTER_FUNCS(12, sse2)
776 SAO_EDGE_FILTER_FUNCS(12, avx2)
777 
778 #define SAO_EDGE_INIT(bitd, opt) do { \
779  c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \
780  c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
781  c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
782  c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
783  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
784 } while (0)
785 
786 #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \
787  PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
788  PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \
789  PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
790  PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
791  PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
792  PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
793  PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
794  PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
795  PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
796 #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \
797  PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
798  PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
799  PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
800  PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
801  PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
802  PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
803  PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
804  PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
805 
807 {
808  int cpu_flags = av_get_cpu_flags();
809 
810  if (bit_depth == 8) {
811  if (EXTERNAL_MMXEXT(cpu_flags)) {
812  c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
813 
814  c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
815  }
816  if (EXTERNAL_SSE2(cpu_flags)) {
817  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
818  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
819  if (ARCH_X86_64) {
820  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
821  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
822 
823  c->idct[2] = ff_hevc_idct_16x16_8_sse2;
824  c->idct[3] = ff_hevc_idct_32x32_8_sse2;
825  }
826  SAO_BAND_INIT(8, sse2);
827 
828  c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
829  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
830  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
831 
832  c->idct[0] = ff_hevc_idct_4x4_8_sse2;
833  c->idct[1] = ff_hevc_idct_8x8_8_sse2;
834 
835  c->add_residual[1] = ff_hevc_add_residual_8_8_sse2;
836  c->add_residual[2] = ff_hevc_add_residual_16_8_sse2;
837  c->add_residual[3] = ff_hevc_add_residual_32_8_sse2;
838  }
839  if (EXTERNAL_SSSE3(cpu_flags)) {
840  if(ARCH_X86_64) {
841  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
842  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
843  }
844  SAO_EDGE_INIT(8, ssse3);
845  }
846  if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
847 
848  EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
849  EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
850  EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4);
851  EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4);
852 
853  QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
854  QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4);
855  QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
856  QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
857  }
858  if (EXTERNAL_AVX(cpu_flags)) {
859  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
860  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
861  if (ARCH_X86_64) {
862  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
863  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
864 
865  c->idct[2] = ff_hevc_idct_16x16_8_avx;
866  c->idct[3] = ff_hevc_idct_32x32_8_avx;
867  }
868  SAO_BAND_INIT(8, avx);
869 
870  c->idct[0] = ff_hevc_idct_4x4_8_avx;
871  c->idct[1] = ff_hevc_idct_8x8_8_avx;
872 
873  c->add_residual[1] = ff_hevc_add_residual_8_8_avx;
874  c->add_residual[2] = ff_hevc_add_residual_16_8_avx;
875  c->add_residual[3] = ff_hevc_add_residual_32_8_avx;
876  }
877  if (EXTERNAL_AVX2(cpu_flags)) {
878  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
879  c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
880  }
882  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
883  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
884  if (ARCH_X86_64) {
885  c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
886  c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
887  c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
888 
889  c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
890  c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
891  c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
892 
893  c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
894  c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
895  c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
896 
897  c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
898  c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
899  c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
900 
901  c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
902  c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
903  c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
904 
905  c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
906  c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
907  c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
908 
909  c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2;
910  c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2;
911  c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2;
912 
913  c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2;
914  c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2;
915  c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2;
916 
917  c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2;
918  c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2;
919  c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2;
920 
921  c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2;
922  c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2;
923  c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2;
924 
925  c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2;
926  c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2;
927  c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2;
928 
929  c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2;
930  c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2;
931  c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2;
932 
933  c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2;
934  c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2;
935  c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2;
936 
937  c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2;
938  c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2;
939  c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2;
940 
941  c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2;
942  c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2;
943  c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2;
944 
945  c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2;
946  c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2;
947  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2;
948 
949  c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2;
950  c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2;
951  c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2;
952 
953  c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2;
954  c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2;
955  c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2;
956 
957  c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2;
958  c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2;
959  c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2;
960 
961  c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2;
962  c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2;
963  c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2;
964 
965  c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2;
966  c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2;
967  c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2;
968  }
969  SAO_BAND_INIT(8, avx2);
970 
971  c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
972  c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
973  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
974 
975  c->add_residual[3] = ff_hevc_add_residual_32_8_avx2;
976  }
977  if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) {
978  c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_avx512icl;
979  c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_avx512icl;
980  c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_avx512icl;
981  c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx512icl;
982  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx512icl;
983  c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_avx512icl;
984  }
985  } else if (bit_depth == 10) {
986  if (EXTERNAL_MMXEXT(cpu_flags)) {
987  c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
988  c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
989  }
990  if (EXTERNAL_SSE2(cpu_flags)) {
991  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
992  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
993  if (ARCH_X86_64) {
994  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
995  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
996 
997  c->idct[2] = ff_hevc_idct_16x16_10_sse2;
998  c->idct[3] = ff_hevc_idct_32x32_10_sse2;
999  }
1000  SAO_BAND_INIT(10, sse2);
1001  SAO_EDGE_INIT(10, sse2);
1002 
1003  c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
1004  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
1005  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
1006 
1007  c->idct[0] = ff_hevc_idct_4x4_10_sse2;
1008  c->idct[1] = ff_hevc_idct_8x8_10_sse2;
1009 
1010  c->add_residual[1] = ff_hevc_add_residual_8_10_sse2;
1011  c->add_residual[2] = ff_hevc_add_residual_16_10_sse2;
1012  c->add_residual[3] = ff_hevc_add_residual_32_10_sse2;
1013  }
1014  if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
1015  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
1016  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
1017  }
1018  if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
1019  EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
1020  EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
1021  EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
1022  EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4);
1023 
1024  QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
1025  QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4);
1026  QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
1027  QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
1028  }
1029  if (EXTERNAL_AVX(cpu_flags)) {
1030  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
1031  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
1032  if (ARCH_X86_64) {
1033  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
1034  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
1035 
1036  c->idct[2] = ff_hevc_idct_16x16_10_avx;
1037  c->idct[3] = ff_hevc_idct_32x32_10_avx;
1038  }
1039 
1040  c->idct[0] = ff_hevc_idct_4x4_10_avx;
1041  c->idct[1] = ff_hevc_idct_8x8_10_avx;
1042 
1043  SAO_BAND_INIT(10, avx);
1044  }
1045  if (EXTERNAL_AVX2(cpu_flags)) {
1046  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
1047  }
1049  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
1050  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
1051  if (ARCH_X86_64) {
1052  c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
1053  c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
1054  c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
1055  c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
1056  c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
1057 
1058  c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
1059  c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
1060  c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
1061  c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
1062  c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
1063 
1064  c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
1065  c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
1066  c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
1067  c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
1068  c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
1069 
1070  c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
1071  c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
1072  c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
1073  c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
1074  c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
1075 
1076  c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
1077  c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
1078  c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
1079  c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
1080  c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
1081  c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
1082  c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
1083  c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
1084  c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
1085  c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
1086 
1087  c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2;
1088  c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2;
1089  c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2;
1090  c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2;
1091  c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2;
1092 
1093  c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2;
1094  c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2;
1095  c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2;
1096  c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2;
1097  c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2;
1098 
1099  c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2;
1100  c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2;
1101  c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2;
1102  c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2;
1103  c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2;
1104 
1105  c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2;
1106  c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2;
1107  c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2;
1108  c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2;
1109  c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2;
1110 
1111  c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2;
1112  c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2;
1113  c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2;
1114  c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2;
1115  c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2;
1116 
1117  c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2;
1118  c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2;
1119  c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2;
1120  c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2;
1121  c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2;
1122 
1123  c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2;
1124  c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2;
1125  c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2;
1126  c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2;
1127  c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2;
1128 
1129  c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2;
1130  c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2;
1131  c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2;
1132  c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2;
1133  c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2;
1134 
1135  c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2;
1136  c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2;
1137  c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2;
1138  c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2;
1139  c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2;
1140 
1141  c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2;
1142  c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2;
1143  c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2;
1144  c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2;
1145  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2;
1146 
1147  c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2;
1148  c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2;
1149  c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2;
1150  c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2;
1151  c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2;
1152 
1153  c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2;
1154  c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2;
1155  c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2;
1156  c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2;
1157  c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2;
1158 
1159  c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2;
1160  c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2;
1161  c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2;
1162  c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2;
1163  c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2;
1164 
1165  c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2;
1166  c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2;
1167  c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2;
1168  c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2;
1169  c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2;
1170 
1171  c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2;
1172  c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2;
1173  c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2;
1174  c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2;
1175  c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2;
1176 
1177  c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2;
1178  c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2;
1179  c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2;
1180  c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2;
1181  c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2;
1182 
1183  c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2;
1184  c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2;
1185  c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2;
1186  c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2;
1187  c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2;
1188 
1189  c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2;
1190  c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2;
1191  c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2;
1192  c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2;
1193  c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2;
1194  }
1195  SAO_BAND_INIT(10, avx2);
1196  SAO_EDGE_INIT(10, avx2);
1197 
1198  c->add_residual[2] = ff_hevc_add_residual_16_10_avx2;
1199  c->add_residual[3] = ff_hevc_add_residual_32_10_avx2;
1200  }
1201  } else if (bit_depth == 12) {
1202  if (EXTERNAL_MMXEXT(cpu_flags)) {
1203  c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
1204  }
1205  if (EXTERNAL_SSE2(cpu_flags)) {
1206  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
1207  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
1208  if (ARCH_X86_64) {
1209  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
1210  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
1211  }
1212  SAO_BAND_INIT(12, sse2);
1213  SAO_EDGE_INIT(12, sse2);
1214 
1215  c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
1216  c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
1217  c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
1218  }
1219  if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
1220  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
1221  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
1222  }
1223  if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
1224  EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
1225  EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
1226  EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
1227  EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4);
1228 
1229  QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
1230  QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4);
1231  QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
1232  QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
1233  }
1234  if (EXTERNAL_AVX(cpu_flags)) {
1235  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
1236  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
1237  if (ARCH_X86_64) {
1238  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
1239  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
1240  }
1241  SAO_BAND_INIT(12, avx);
1242  }
1243  if (EXTERNAL_AVX2(cpu_flags)) {
1244  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
1245  }
1247  c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
1248  c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;
1249 
1250  SAO_BAND_INIT(12, avx2);
1251  SAO_EDGE_INIT(12, avx2);
1252  }
1253  }
1254 }
SAO_BAND_FILTER_FUNCS
#define SAO_BAND_FILTER_FUNCS(bitd, opt)
Definition: dsp_init.c:729
mc_rep_bi_func
#define mc_rep_bi_func(name, bitd, step, W, opt)
Definition: dsp_init.c:219
cpu.h
ff_hevc_add_residual_16_8_avx
void ff_hevc_add_residual_16_8_avx(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
mem_internal.h
ff_hevc_put_hevc_pel_pixels48_10_avx2
void ff_hevc_put_hevc_pel_pixels48_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_dsp_init_x86
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
Definition: dsp_init.c:806
ff_hevc_put_hevc_pel_pixels16_10_avx2
void ff_hevc_put_hevc_pel_pixels16_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
ff_hevc_put_hevc_bi_pel_pixels48_10_avx2
void ff_hevc_put_hevc_bi_pel_pixels48_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_h16_8_avx512icl
void ff_hevc_put_hevc_qpel_h16_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
mc_rep_funcs2
#define mc_rep_funcs2(name, bitd, step1, step2, W, opt)
Definition: dsp_init.c:270
ff_hevc_put_hevc_pel_pixels32_8_avx2
void ff_hevc_put_hevc_pel_pixels32_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
SAO_EDGE_INIT
#define SAO_EDGE_INIT(bitd, opt)
Definition: dsp_init.c:778
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
ff_hevc_put_hevc_pel_pixels64_8_avx2
void ff_hevc_put_hevc_pel_pixels64_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
ff_hevc_put_hevc_qpel_h8_8_avx512icl
void ff_hevc_put_hevc_qpel_h8_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_add_residual_32_8_avx
void ff_hevc_add_residual_32_8_avx(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
FW_PUT
#define FW_PUT(p, a, b, depth, opt)
Definition: dsp_init.c:94
ff_hevc_add_residual_32_10_avx2
void ff_hevc_add_residual_32_10_avx2(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_pel_pixels24_10_avx2
void ff_hevc_put_hevc_pel_pixels24_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_uni_pel_pixels48_8_avx2
void ff_hevc_put_hevc_uni_pel_pixels48_8_avx2(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
SAO_EDGE_FILTER_FUNCS
#define SAO_EDGE_FILTER_FUNCS(bitd, opt)
Definition: dsp_init.c:759
EXTERNAL_AVX2
#define EXTERNAL_AVX2(flags)
Definition: cpu.h:78
ff_hevc_add_residual_8_10_sse2
void ff_hevc_add_residual_8_10_sse2(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_uni_pel_pixels96_8_avx2
void ff_hevc_put_hevc_uni_pel_pixels96_8_avx2(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_uni_pel_pixels128_8_avx2
void ff_hevc_put_hevc_uni_pel_pixels128_8_avx2(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_add_residual_32_8_avx2
void ff_hevc_add_residual_32_8_avx2(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
ff_hevc_add_residual_4_8_mmxext
void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
ff_hevc_add_residual_8_8_sse2
void ff_hevc_add_residual_8_8_sse2(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
mc_rep_uni_func
#define mc_rep_uni_func(name, bitd, step, W, opt)
Definition: dsp_init.c:205
hevcdsp.h
ff_hevc_add_residual_16_10_sse2
void ff_hevc_add_residual_16_10_sse2(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
ff_hevc_add_residual_16_10_avx2
void ff_hevc_add_residual_16_10_avx2(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
asm.h
mc_rep_func
#define mc_rep_func(name, bitd, step, W, opt)
Definition: dsp_init.c:192
IDCT_DC_FUNCS
#define IDCT_DC_FUNCS(W, opt)
Definition: dsp_init.c:63
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SAO_BAND_INIT
#define SAO_BAND_INIT(bitd, opt)
Definition: dsp_init.c:751
QPEL_LINKS
#define QPEL_LINKS(pointer, my, mx, fname, bitd, opt)
Definition: dsp_init.c:796
ff_hevc_add_residual_4_10_mmxext
void ff_hevc_add_residual_4_10_mmxext(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
dsp.h
cpu.h
ff_hevc_put_hevc_qpel_h32_8_avx512icl
void ff_hevc_put_hevc_qpel_h32_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
LFL_FUNCS
#define LFL_FUNCS(type, depth, opt)
Definition: dsp_init.c:43
ff_hevc_put_hevc_bi_pel_pixels48_8_avx2
void ff_hevc_put_hevc_bi_pel_pixels48_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_uni_pel_pixels32_8_avx2
void ff_hevc_put_hevc_uni_pel_pixels32_8_avx2(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_h64_8_avx512icl
void ff_hevc_put_hevc_qpel_h64_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
HEVCDSPContext
Definition: dsp.h:47
ff_hevc_put_hevc_pel_pixels48_8_avx2
void ff_hevc_put_hevc_pel_pixels48_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
h2656dsp.h
ff_hevc_put_hevc_bi_pel_pixels32_8_avx2
void ff_hevc_put_hevc_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_add_residual_8_8_avx
void ff_hevc_add_residual_8_8_avx(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_bi_pel_pixels32_10_avx2
void ff_hevc_put_hevc_bi_pel_pixels32_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_uni_pel_pixels64_8_avx2
void ff_hevc_put_hevc_uni_pel_pixels64_8_avx2(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_hv8_8_avx512icl
void ff_hevc_put_hevc_qpel_hv8_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
mc_rep_funcs
#define mc_rep_funcs(name, bitd, step, W, opt)
Definition: dsp_init.c:235
ff_hevc_put_hevc_bi_pel_pixels64_10_avx2
void ff_hevc_put_hevc_bi_pel_pixels64_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
EPEL_LINKS
#define EPEL_LINKS(pointer, my, mx, fname, bitd, opt)
Definition: dsp_init.c:786
ff_hevc_put_hevc_pel_pixels64_10_avx2
void ff_hevc_put_hevc_pel_pixels64_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
EXTERNAL_AVX512ICL
#define EXTERNAL_AVX512ICL(flags)
Definition: cpu.h:83
ff_hevc_add_residual_32_10_sse2
void ff_hevc_add_residual_32_10_sse2(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
LFC_FUNCS
#define LFC_FUNCS(type, depth, opt)
Definition: dsp_init.c:39
ff_hevc_put_hevc_bi_pel_pixels24_10_avx2
void ff_hevc_put_hevc_bi_pel_pixels24_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_add_residual_32_8_sse2
void ff_hevc_add_residual_32_8_sse2(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_bi_pel_pixels16_10_avx2
void ff_hevc_put_hevc_bi_pel_pixels16_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
IDCT_FUNCS
#define IDCT_FUNCS(opt)
Definition: dsp_init.c:75
ff_hevc_put_hevc_bi_pel_pixels64_8_avx2
void ff_hevc_put_hevc_bi_pel_pixels64_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
ff_hevc_put_hevc_qpel_h4_8_avx512icl
void ff_hevc_put_hevc_qpel_h4_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
ff_hevc_add_residual_16_8_sse2
void ff_hevc_add_residual_16_8_sse2(uint8_t *dst, const int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_pel_pixels32_10_avx2
void ff_hevc_put_hevc_pel_pixels32_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)