28 static inline uint64_t
avg2(uint64_t
a, uint64_t
b)
30 return (a | b) - (((a ^
b) &
BYTE_VEC(0xfe)) >> 1);
33 static inline uint64_t
avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
35 uint64_t r1 = ((l1 & ~
BYTE_VEC(0x03)) >> 2)
39 uint64_t r2 = (( (l1 &
BYTE_VEC(0x03))
51 if ((
size_t) pix2 & 0x7) {
58 result +=
perr(p1, p2);
69 result +=
perr(p1, p2);
85 if ((
size_t) pix2 & 0x7) {
88 uint64_t p1_l, p1_r, p2_l, p2_r;
99 result +=
perr(p1_l, p2_l)
104 uint64_t p1_l, p1_r, p2_l, p2_r;
107 p1_r =
ldq(pix1 + 8);
109 p2_r =
ldq(pix2 + 8);
113 result +=
perr(p1_l, p2_l)
125 uint64_t disalign = (size_t) pix2 & 0x7;
130 uint64_t p1_l, p1_r, p2_l, p2_r;
134 p1_r =
ldq(pix1 + 8);
137 p2_l =
avg2(l, (l >> 8) | ((uint64_t) r << 56));
138 p2_r =
avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56));
142 result +=
perr(p1_l, p2_l)
152 uint64_t p1_l, p1_r, p2_l, p2_r;
156 p1_r =
ldq(pix1 + 8);
159 r =
ldq_u(pix2 + 16);
165 result +=
perr(p1_l, p2_l)
171 uint64_t disalign1 = disalign + 1;
172 uint64_t p1_l, p1_r, p2_l, p2_r;
176 p1_r =
ldq(pix1 + 8);
179 r =
ldq_u(pix2 + 16);
187 result +=
perr(p1_l, p2_l)
199 if ((
size_t) pix2 & 0x7) {
200 uint64_t t, p2_l, p2_r;
206 uint64_t p1_l, p1_r, np2_l, np2_r;
210 p1_r =
ldq(pix1 + 8);
216 result +=
perr(p1_l,
avg2(p2_l, np2_l))
227 p2_r =
ldq(pix2 + 8);
229 uint64_t p1_l, p1_r, np2_l, np2_r;
232 p1_r =
ldq(pix1 + 8);
235 np2_r =
ldq(pix2 + 8);
237 result +=
perr(p1_l,
avg2(p2_l, np2_l))
253 uint64_t p2_l, p2_r, p2_x;
256 p1_r =
ldq(pix1 + 8);
258 if ((
size_t) pix2 & 0x7) {
260 p2_r =
uldq(pix2 + 8);
261 p2_x = (uint64_t) pix2[16] << 56;
264 p2_r =
ldq(pix2 + 8);
265 p2_x =
ldq(pix2 + 16) << 56;
269 uint64_t np1_l, np1_r;
270 uint64_t np2_l, np2_r, np2_x;
276 np1_r =
ldq(pix1 + 8);
278 if ((
size_t) pix2 & 0x7) {
280 np2_r =
uldq(pix2 + 8);
281 np2_x = (uint64_t) pix2[16] << 56;
284 np2_r =
ldq(pix2 + 8);
285 np2_x =
ldq(pix2 + 16) << 56;
289 avg4( p2_l, ( p2_l >> 8) | ((uint64_t) p2_r << 56),
290 np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56)))
292 avg4( p2_r, ( p2_r >> 8) | ((uint64_t) p2_x),
293 np2_r, (np2_r >> 8) | ((uint64_t) np2_x)));
static int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
static int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
static int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Macro definitions for various function/variable attributes.
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
static int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
static uint64_t avg2(uint64_t a, uint64_t b)
me_cmp_func pix_abs[2][4]
av_cold void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx)
main external API structure.
static uint64_t BYTE_VEC(uint64_t x)
and forward the result(frame or status change) to the corresponding input.If nothing is possible
static uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)