36 #define bf(fn, bd, opt) fn##_##bd##_##opt
37 #define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
39 #define DMVR_PROTOTYPES(bd, opt) \
40 void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
41 int height, intptr_t mx, intptr_t my, int width); \
42 void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
43 int height, intptr_t mx, intptr_t my, int width); \
44 void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
45 int height, intptr_t mx, intptr_t my, int width); \
46 void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
47 int height, intptr_t mx, intptr_t my, int width); \
53 #define OF_INIT(BD, OPT) do { \
54 void ff_vvc_apply_bdof_## BD ## _ ## OPT(uint8_t *dst, ptrdiff_t dst_stride, \
55 const int16_t *src0, const int16_t *src1, \
57 c->inter.apply_bdof = ff_vvc_apply_bdof_## BD ##_## OPT; \
60 #define ALF_BPC_PROTOTYPES(bpc, opt) \
61 void BF(ff_vvc_alf_classify_grad, bpc, opt)(int *gradient_sum, \
62 const uint8_t *src, ptrdiff_t src_stride, intptr_t width, intptr_t height, intptr_t vb_pos); \
63 void BF(ff_vvc_alf_classify, bpc, opt)(int *class_idx, int *transpose_idx, const int *gradient_sum, \
64 intptr_t width, intptr_t height, intptr_t vb_pos, intptr_t bit_depth); \
66 ALF_BPC_PROTOTYPES(8, avx2)
67 ALF_BPC_PROTOTYPES(16, avx2)
70 #define FW_PUT(name, depth, opt) \
71 static void vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
72 int height, const int8_t *hf, const int8_t *vf, int width) \
74 ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
77 #if HAVE_SSE4_EXTERNAL
78 #define FW_PUT_TAP(fname, bitd, opt ) \
79 FW_PUT(fname##4, bitd, opt ) \
80 FW_PUT(fname##8, bitd, opt ) \
81 FW_PUT(fname##16, bitd, opt ) \
82 FW_PUT(fname##32, bitd, opt ) \
83 FW_PUT(fname##64, bitd, opt ) \
84 FW_PUT(fname##128, bitd, opt ) \
86 #define FW_PUT_4TAP(fname, bitd, opt) \
87 FW_PUT(fname ## 2, bitd, opt) \
88 FW_PUT_TAP(fname, bitd, opt)
90 #define FW_PUT_4TAP_SSE4(bitd) \
91 FW_PUT_4TAP(pixels, bitd, sse4) \
92 FW_PUT_4TAP(4tap_h, bitd, sse4) \
93 FW_PUT_4TAP(4tap_v, bitd, sse4) \
94 FW_PUT_4TAP(4tap_hv, bitd, sse4)
96 #define FW_PUT_8TAP_SSE4(bitd) \
97 FW_PUT_TAP(8tap_h, bitd, sse4) \
98 FW_PUT_TAP(8tap_v, bitd, sse4) \
99 FW_PUT_TAP(8tap_hv, bitd, sse4)
101 #define FW_PUT_SSE4(bitd) \
102 FW_PUT_4TAP_SSE4(bitd) \
103 FW_PUT_8TAP_SSE4(bitd)
110 #if HAVE_AVX2_EXTERNAL
111 #define FW_PUT_TAP_AVX2(n, bitd) \
112 FW_PUT(n ## tap_h32, bitd, avx2) \
113 FW_PUT(n ## tap_h64, bitd, avx2) \
114 FW_PUT(n ## tap_h128, bitd, avx2) \
115 FW_PUT(n ## tap_v32, bitd, avx2) \
116 FW_PUT(n ## tap_v64, bitd, avx2) \
117 FW_PUT(n ## tap_v128, bitd, avx2)
119 #define FW_PUT_AVX2(bitd) \
120 FW_PUT(pixels32, bitd, avx2) \
121 FW_PUT(pixels64, bitd, avx2) \
122 FW_PUT(pixels128, bitd, avx2) \
123 FW_PUT_TAP_AVX2(4, bitd) \
124 FW_PUT_TAP_AVX2(8, bitd) \
130 #define FW_PUT_TAP_16BPC_AVX2(n, bitd) \
131 FW_PUT(n ## tap_h16, bitd, avx2) \
132 FW_PUT(n ## tap_v16, bitd, avx2) \
133 FW_PUT(n ## tap_hv16, bitd, avx2) \
134 FW_PUT(n ## tap_hv32, bitd, avx2) \
135 FW_PUT(n ## tap_hv64, bitd, avx2) \
136 FW_PUT(n ## tap_hv128, bitd, avx2)
138 #define FW_PUT_16BPC_AVX2(bitd) \
139 FW_PUT(pixels16, bitd, avx2) \
140 FW_PUT_TAP_16BPC_AVX2(4, bitd) \
141 FW_PUT_TAP_16BPC_AVX2(8, bitd)
143 FW_PUT_16BPC_AVX2(10)
144 FW_PUT_16BPC_AVX2(12)
146 #define ALF_FUNCS(bpc, bd, opt) \
147 static void bf(vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \
148 const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp) \
150 BF(ff_vvc_alf_classify_grad, bpc, opt)(gradient_tmp, src, src_stride, width, height, vb_pos); \
151 BF(ff_vvc_alf_classify, bpc, opt)(class_idx, transpose_idx, gradient_tmp, width, height, vb_pos, bd); \
154 ALF_FUNCS(8, 8, avx2)
155 ALF_FUNCS(16, 10, avx2)
156 ALF_FUNCS(16, 12, avx2)
160 #define SAO_FILTER_FUNC(wd, bitd, opt) \
161 void ff_vvc_sao_band_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
162 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
163 void ff_vvc_sao_edge_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
164 const int16_t *sao_offset_val, int eo, int width, int height); \
166 #define SAO_FILTER_FUNCS(bitd, opt) \
167 SAO_FILTER_FUNC(8, bitd, opt) \
168 SAO_FILTER_FUNC(16, bitd, opt) \
169 SAO_FILTER_FUNC(32, bitd, opt) \
170 SAO_FILTER_FUNC(48, bitd, opt) \
171 SAO_FILTER_FUNC(64, bitd, opt) \
172 SAO_FILTER_FUNC(80, bitd, opt) \
173 SAO_FILTER_FUNC(96, bitd, opt) \
174 SAO_FILTER_FUNC(112, bitd, opt) \
175 SAO_FILTER_FUNC(128, bitd, opt) \
177 SAO_FILTER_FUNCS(8, avx2)
178 SAO_FILTER_FUNCS(10, avx2)
179 SAO_FILTER_FUNCS(12, avx2)
181 #define SAO_FILTER_INIT(type, bitd, opt) do { \
182 c->sao.type##_filter[0] = ff_vvc_sao_##type##_filter_8_##bitd##_##opt; \
183 c->sao.type##_filter[1] = ff_vvc_sao_##type##_filter_16_##bitd##_##opt; \
184 c->sao.type##_filter[2] = ff_vvc_sao_##type##_filter_32_##bitd##_##opt; \
185 c->sao.type##_filter[3] = ff_vvc_sao_##type##_filter_48_##bitd##_##opt; \
186 c->sao.type##_filter[4] = ff_vvc_sao_##type##_filter_64_##bitd##_##opt; \
187 c->sao.type##_filter[5] = ff_vvc_sao_##type##_filter_80_##bitd##_##opt; \
188 c->sao.type##_filter[6] = ff_vvc_sao_##type##_filter_96_##bitd##_##opt; \
189 c->sao.type##_filter[7] = ff_vvc_sao_##type##_filter_112_##bitd##_##opt; \
190 c->sao.type##_filter[8] = ff_vvc_sao_##type##_filter_128_##bitd##_##opt; \
193 #define SAO_INIT(bitd, opt) do { \
194 SAO_FILTER_INIT(band, bitd, opt); \
195 SAO_FILTER_INIT(edge, bitd, opt); \
198 #define AVG_INIT(bd, opt) do { \
199 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
200 const int16_t *src0, const int16_t *src1, int width, int height);\
201 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
202 const int16_t *src0, const int16_t *src1, int width, int height, \
203 int denom, int w0, int w1, int o); \
204 c->inter.avg = bf(ff_vvc_avg, bd, opt); \
205 c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \
208 #define DMVR_INIT(bd) do { \
209 c->inter.dmvr[0][0] = ff_vvc_dmvr_##bd##_avx2; \
210 c->inter.dmvr[0][1] = ff_vvc_dmvr_h_##bd##_avx2; \
211 c->inter.dmvr[1][0] = ff_vvc_dmvr_v_##bd##_avx2; \
212 c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_avx2; \
215 #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \
216 dst[C][W][idx1][idx2] = vvc_put_## name ## _ ## D ## _##opt; \
217 dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
219 #define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \
220 PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \
221 PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \
222 PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \
223 PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \
224 PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \
225 PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt );
227 #define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
228 MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt)
230 #define MC_8TAP_LINKS_SSE4(bd) \
231 MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
232 MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \
233 MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \
234 MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4)
236 #define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
237 PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \
238 MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \
240 #define MC_4TAP_LINKS_SSE4(bd) \
241 MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
242 MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \
243 MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \
244 MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4)
246 #define MC_LINK_SSE4(bd) \
247 MC_4TAP_LINKS_SSE4(bd) \
248 MC_8TAP_LINKS_SSE4(bd)
250 #define MC_TAP_LINKS_AVX2(C,tap,bd) do { \
251 PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \
252 PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \
253 PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \
254 PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \
255 PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \
256 PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \
257 PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \
258 PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \
259 PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \
262 #define MC_LINKS_AVX2(bd) \
263 MC_TAP_LINKS_AVX2(LUMA, 8, bd); \
264 MC_TAP_LINKS_AVX2(CHROMA, 4, bd);
266 #define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \
267 PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \
268 PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \
269 PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \
270 PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \
271 PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \
272 PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \
273 PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \
276 #define MC_LINKS_16BPC_AVX2(bd) \
277 MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \
278 MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
280 int ff_vvc_sad_avx2(
const int16_t *
src0,
const int16_t *
src1,
int dx,
int dy,
int block_w,
int block_h);
281 #define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2
283 #define ALF_INIT(bd, opt) do { \
284 void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
285 const uint8_t *src, ptrdiff_t src_stride, int width, int height, \
286 const int16_t *filter, const int16_t *clip, int vb_pos); \
287 void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
288 const uint8_t *src, ptrdiff_t src_stride, int width, int height, \
289 const int16_t *filter, const int16_t *clip, int vb_pos); \
290 c->alf.filter[LUMA] = bf(ff_vvc_alf_filter_luma, bd, opt); \
291 c->alf.filter[CHROMA] = bf(ff_vvc_alf_filter_chroma, bd, opt); \
292 c->alf.classify = bf(vvc_alf_classify, bd, opt); \
298 #endif // ARCH_X86_64
307 #if HAVE_SSE4_EXTERNAL
312 #if HAVE_AVX2_EXTERNAL
328 #if HAVE_SSE4_EXTERNAL
333 #if HAVE_AVX2_EXTERNAL
339 MC_LINKS_16BPC_AVX2(10);
350 #if HAVE_SSE4_EXTERNAL
355 #if HAVE_AVX2_EXTERNAL
361 MC_LINKS_16BPC_AVX2(12);