Go to the documentation of this file.
35 #define FW_PUT(name, depth, opt) \
36 static void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
37 int height, const int8_t *hf, const int8_t *vf, int width) \
39 ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
42 #define FW_PUT_TAP(fname, bitd, opt ) \
43 FW_PUT(fname##4, bitd, opt ) \
44 FW_PUT(fname##8, bitd, opt ) \
45 FW_PUT(fname##16, bitd, opt ) \
46 FW_PUT(fname##32, bitd, opt ) \
47 FW_PUT(fname##64, bitd, opt ) \
48 FW_PUT(fname##128, bitd, opt ) \
50 #define FW_PUT_4TAP(fname, bitd, opt) \
51 FW_PUT(fname ## 2, bitd, opt) \
52 FW_PUT_TAP(fname, bitd, opt)
54 #define FW_PUT_4TAP_SSE4(bitd) \
55 FW_PUT_4TAP(pixels, bitd, sse4) \
56 FW_PUT_4TAP(4tap_h, bitd, sse4) \
57 FW_PUT_4TAP(4tap_v, bitd, sse4) \
58 FW_PUT_4TAP(4tap_hv, bitd, sse4)
60 #define FW_PUT_8TAP_SSE4(bitd) \
61 FW_PUT_TAP(8tap_h, bitd, sse4) \
62 FW_PUT_TAP(8tap_v, bitd, sse4) \
63 FW_PUT_TAP(8tap_hv, bitd, sse4)
65 #define FW_PUT_SSE4(bitd) \
66 FW_PUT_4TAP_SSE4(bitd) \
67 FW_PUT_8TAP_SSE4(bitd)
73 #define FW_PUT_TAP_AVX2(n, bitd) \
74 FW_PUT(n ## tap_h32, bitd, avx2) \
75 FW_PUT(n ## tap_h64, bitd, avx2) \
76 FW_PUT(n ## tap_h128, bitd, avx2) \
77 FW_PUT(n ## tap_v32, bitd, avx2) \
78 FW_PUT(n ## tap_v64, bitd, avx2) \
79 FW_PUT(n ## tap_v128, bitd, avx2)
81 #define FW_PUT_AVX2(bitd) \
82 FW_PUT(pixels32, bitd, avx2) \
83 FW_PUT(pixels64, bitd, avx2) \
84 FW_PUT(pixels128, bitd, avx2) \
85 FW_PUT_TAP_AVX2(4, bitd) \
86 FW_PUT_TAP_AVX2(8, bitd) \
92 #define FW_PUT_TAP_16BPC_AVX2(n, bitd) \
93 FW_PUT(n ## tap_h16, bitd, avx2) \
94 FW_PUT(n ## tap_v16, bitd, avx2) \
95 FW_PUT(n ## tap_hv16, bitd, avx2) \
96 FW_PUT(n ## tap_hv32, bitd, avx2) \
97 FW_PUT(n ## tap_hv64, bitd, avx2) \
98 FW_PUT(n ## tap_hv128, bitd, avx2)
100 #define FW_PUT_16BPC_AVX2(bitd) \
101 FW_PUT(pixels16, bitd, avx2) \
102 FW_PUT_TAP_16BPC_AVX2(4, bitd) \
103 FW_PUT_TAP_16BPC_AVX2(8, bitd)
105 FW_PUT_16BPC_AVX2(10)
106 FW_PUT_16BPC_AVX2(12)
108 #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \
109 dst[C][W][idx1][idx2] = ff_vvc_put_## name ## _ ## D ## _##opt; \
110 dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
112 #define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \
113 PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \
114 PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \
115 PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \
116 PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \
117 PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \
118 PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt );
120 #define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
121 MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt)
123 #define MC_8TAP_LINKS_SSE4(bd) \
124 MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
125 MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \
126 MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \
127 MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4)
129 #define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
130 PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \
131 MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \
133 #define MC_4TAP_LINKS_SSE4(bd) \
134 MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
135 MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \
136 MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \
137 MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4)
139 #define MC_LINK_SSE4(bd) \
140 MC_4TAP_LINKS_SSE4(bd) \
141 MC_8TAP_LINKS_SSE4(bd)
143 #define MC_TAP_LINKS_AVX2(C,tap,bd) do { \
144 PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \
145 PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \
146 PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \
147 PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \
148 PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \
149 PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \
150 PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \
151 PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \
152 PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \
155 #define MC_LINKS_AVX2(bd) \
156 MC_TAP_LINKS_AVX2(LUMA, 8, bd); \
157 MC_TAP_LINKS_AVX2(CHROMA, 4, bd);
159 #define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \
160 PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \
161 PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \
162 PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \
163 PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \
164 PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \
165 PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \
166 PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \
169 #define MC_LINKS_16BPC_AVX2(bd) \
170 MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \
171 MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
173 #define bf(fn, bd, opt) fn##_##bd##_##opt
174 #define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
176 #define AVG_BPC_FUNC(bpc, opt) \
177 void BF(ff_vvc_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
178 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, intptr_t pixel_max); \
179 void BF(ff_vvc_w_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
180 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, \
181 intptr_t denom, intptr_t w0, intptr_t w1, intptr_t o0, intptr_t o1, intptr_t pixel_max);
183 #define AVG_FUNCS(bpc, bd, opt) \
184 static void bf(avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
185 const int16_t *src0, const int16_t *src1, int width, int height) \
187 BF(ff_vvc_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, (1 << bd) - 1); \
189 static void bf(w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
190 const int16_t *src0, const int16_t *src1, int width, int height, \
191 int denom, int w0, int w1, int o0, int o1) \
193 BF(ff_vvc_w_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, \
194 denom, w0, w1, o0, o1, (1 << bd) - 1); \
197 AVG_BPC_FUNC(8, avx2)
198 AVG_BPC_FUNC(16, avx2)
200 AVG_FUNCS(8, 8, avx2)
201 AVG_FUNCS(16, 10, avx2)
202 AVG_FUNCS(16, 12, avx2)
204 #define AVG_INIT(bd, opt) do { \
205 c->inter.avg = bf(avg, bd, opt); \
206 c->inter.w_avg = bf(w_avg, bd, opt); \
222 }
else if (bd == 10) {
228 MC_LINKS_16BPC_AVX2(10);
230 }
else if (bd == 12) {
236 MC_LINKS_16BPC_AVX2(12);
void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
#define EXTERNAL_AVX2_FAST(flags)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
#define EXTERNAL_AVX2(flags)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define EXTERNAL_SSE4(flags)