FFmpeg
me_cmp.c
Go to the documentation of this file.
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "libavutil/attributes.h"
24 #include "libavutil/internal.h"
25 #include "libavutil/mem_internal.h"
26 #include "avcodec.h"
27 #include "copy_block.h"
28 #include "simple_idct.h"
29 #include "me_cmp.h"
30 #include "mpegvideoenc.h"
31 #include "config.h"
32 #include "config_components.h"
33 
34 /* (i - 256) * (i - 256) */
35 const uint32_t ff_square_tab[512] = {
36  65536, 65025, 64516, 64009, 63504, 63001, 62500, 62001, 61504, 61009, 60516, 60025, 59536, 59049, 58564, 58081,
37  57600, 57121, 56644, 56169, 55696, 55225, 54756, 54289, 53824, 53361, 52900, 52441, 51984, 51529, 51076, 50625,
38  50176, 49729, 49284, 48841, 48400, 47961, 47524, 47089, 46656, 46225, 45796, 45369, 44944, 44521, 44100, 43681,
39  43264, 42849, 42436, 42025, 41616, 41209, 40804, 40401, 40000, 39601, 39204, 38809, 38416, 38025, 37636, 37249,
40  36864, 36481, 36100, 35721, 35344, 34969, 34596, 34225, 33856, 33489, 33124, 32761, 32400, 32041, 31684, 31329,
41  30976, 30625, 30276, 29929, 29584, 29241, 28900, 28561, 28224, 27889, 27556, 27225, 26896, 26569, 26244, 25921,
42  25600, 25281, 24964, 24649, 24336, 24025, 23716, 23409, 23104, 22801, 22500, 22201, 21904, 21609, 21316, 21025,
43  20736, 20449, 20164, 19881, 19600, 19321, 19044, 18769, 18496, 18225, 17956, 17689, 17424, 17161, 16900, 16641,
44  16384, 16129, 15876, 15625, 15376, 15129, 14884, 14641, 14400, 14161, 13924, 13689, 13456, 13225, 12996, 12769,
45  12544, 12321, 12100, 11881, 11664, 11449, 11236, 11025, 10816, 10609, 10404, 10201, 10000, 9801, 9604, 9409,
46  9216, 9025, 8836, 8649, 8464, 8281, 8100, 7921, 7744, 7569, 7396, 7225, 7056, 6889, 6724, 6561,
47  6400, 6241, 6084, 5929, 5776, 5625, 5476, 5329, 5184, 5041, 4900, 4761, 4624, 4489, 4356, 4225,
48  4096, 3969, 3844, 3721, 3600, 3481, 3364, 3249, 3136, 3025, 2916, 2809, 2704, 2601, 2500, 2401,
49  2304, 2209, 2116, 2025, 1936, 1849, 1764, 1681, 1600, 1521, 1444, 1369, 1296, 1225, 1156, 1089,
50  1024, 961, 900, 841, 784, 729, 676, 625, 576, 529, 484, 441, 400, 361, 324, 289,
51  256, 225, 196, 169, 144, 121, 100, 81, 64, 49, 36, 25, 16, 9, 4, 1,
52  0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225,
53  256, 289, 324, 361, 400, 441, 484, 529, 576, 625, 676, 729, 784, 841, 900, 961,
54  1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209,
55  2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025, 3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969,
56  4096, 4225, 4356, 4489, 4624, 4761, 4900, 5041, 5184, 5329, 5476, 5625, 5776, 5929, 6084, 6241,
57  6400, 6561, 6724, 6889, 7056, 7225, 7396, 7569, 7744, 7921, 8100, 8281, 8464, 8649, 8836, 9025,
58  9216, 9409, 9604, 9801, 10000, 10201, 10404, 10609, 10816, 11025, 11236, 11449, 11664, 11881, 12100, 12321,
59  12544, 12769, 12996, 13225, 13456, 13689, 13924, 14161, 14400, 14641, 14884, 15129, 15376, 15625, 15876, 16129,
60  16384, 16641, 16900, 17161, 17424, 17689, 17956, 18225, 18496, 18769, 19044, 19321, 19600, 19881, 20164, 20449,
61  20736, 21025, 21316, 21609, 21904, 22201, 22500, 22801, 23104, 23409, 23716, 24025, 24336, 24649, 24964, 25281,
62  25600, 25921, 26244, 26569, 26896, 27225, 27556, 27889, 28224, 28561, 28900, 29241, 29584, 29929, 30276, 30625,
63  30976, 31329, 31684, 32041, 32400, 32761, 33124, 33489, 33856, 34225, 34596, 34969, 35344, 35721, 36100, 36481,
64  36864, 37249, 37636, 38025, 38416, 38809, 39204, 39601, 40000, 40401, 40804, 41209, 41616, 42025, 42436, 42849,
65  43264, 43681, 44100, 44521, 44944, 45369, 45796, 46225, 46656, 47089, 47524, 47961, 48400, 48841, 49284, 49729,
66  50176, 50625, 51076, 51529, 51984, 52441, 52900, 53361, 53824, 54289, 54756, 55225, 55696, 56169, 56644, 57121,
67  57600, 58081, 58564, 59049, 59536, 60025, 60516, 61009, 61504, 62001, 62500, 63001, 63504, 64009, 64516, 65025,
68 };
69 
70 static int sse4_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
71  ptrdiff_t stride, int h)
72 {
73  int s = 0, i;
74  const uint32_t *sq = ff_square_tab + 256;
75 
76  for (i = 0; i < h; i++) {
77  s += sq[pix1[0] - pix2[0]];
78  s += sq[pix1[1] - pix2[1]];
79  s += sq[pix1[2] - pix2[2]];
80  s += sq[pix1[3] - pix2[3]];
81  pix1 += stride;
82  pix2 += stride;
83  }
84  return s;
85 }
86 
87 static int sse8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
88  ptrdiff_t stride, int h)
89 {
90  int s = 0, i;
91  const uint32_t *sq = ff_square_tab + 256;
92 
93  for (i = 0; i < h; i++) {
94  s += sq[pix1[0] - pix2[0]];
95  s += sq[pix1[1] - pix2[1]];
96  s += sq[pix1[2] - pix2[2]];
97  s += sq[pix1[3] - pix2[3]];
98  s += sq[pix1[4] - pix2[4]];
99  s += sq[pix1[5] - pix2[5]];
100  s += sq[pix1[6] - pix2[6]];
101  s += sq[pix1[7] - pix2[7]];
102  pix1 += stride;
103  pix2 += stride;
104  }
105  return s;
106 }
107 
108 static int sse16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
109  ptrdiff_t stride, int h)
110 {
111  int s = 0, i;
112  const uint32_t *sq = ff_square_tab + 256;
113 
114  for (i = 0; i < h; i++) {
115  s += sq[pix1[0] - pix2[0]];
116  s += sq[pix1[1] - pix2[1]];
117  s += sq[pix1[2] - pix2[2]];
118  s += sq[pix1[3] - pix2[3]];
119  s += sq[pix1[4] - pix2[4]];
120  s += sq[pix1[5] - pix2[5]];
121  s += sq[pix1[6] - pix2[6]];
122  s += sq[pix1[7] - pix2[7]];
123  s += sq[pix1[8] - pix2[8]];
124  s += sq[pix1[9] - pix2[9]];
125  s += sq[pix1[10] - pix2[10]];
126  s += sq[pix1[11] - pix2[11]];
127  s += sq[pix1[12] - pix2[12]];
128  s += sq[pix1[13] - pix2[13]];
129  s += sq[pix1[14] - pix2[14]];
130  s += sq[pix1[15] - pix2[15]];
131 
132  pix1 += stride;
133  pix2 += stride;
134  }
135  return s;
136 }
137 
138 static int sum_abs_dctelem_c(const int16_t *block)
139 {
140  int sum = 0, i;
141 
142  for (i = 0; i < 64; i++)
143  sum += FFABS(block[i]);
144  return sum;
145 }
146 
147 #define avg2(a, b) (((a) + (b) + 1) >> 1)
148 #define avg4(a, b, c, d) (((a) + (b) + (c) + (d) + 2) >> 2)
149 
150 static inline int pix_abs16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
151  ptrdiff_t stride, int h)
152 {
153  int s = 0, i;
154 
155  for (i = 0; i < h; i++) {
156  s += abs(pix1[0] - pix2[0]);
157  s += abs(pix1[1] - pix2[1]);
158  s += abs(pix1[2] - pix2[2]);
159  s += abs(pix1[3] - pix2[3]);
160  s += abs(pix1[4] - pix2[4]);
161  s += abs(pix1[5] - pix2[5]);
162  s += abs(pix1[6] - pix2[6]);
163  s += abs(pix1[7] - pix2[7]);
164  s += abs(pix1[8] - pix2[8]);
165  s += abs(pix1[9] - pix2[9]);
166  s += abs(pix1[10] - pix2[10]);
167  s += abs(pix1[11] - pix2[11]);
168  s += abs(pix1[12] - pix2[12]);
169  s += abs(pix1[13] - pix2[13]);
170  s += abs(pix1[14] - pix2[14]);
171  s += abs(pix1[15] - pix2[15]);
172  pix1 += stride;
173  pix2 += stride;
174  }
175  return s;
176 }
177 
178 static inline int pix_median_abs16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
179  ptrdiff_t stride, int h)
180 {
181  int s = 0, i, j;
182 
183 #define V(x) (pix1[x] - pix2[x])
184 
185  s += abs(V(0));
186  s += abs(V(1) - V(0));
187  s += abs(V(2) - V(1));
188  s += abs(V(3) - V(2));
189  s += abs(V(4) - V(3));
190  s += abs(V(5) - V(4));
191  s += abs(V(6) - V(5));
192  s += abs(V(7) - V(6));
193  s += abs(V(8) - V(7));
194  s += abs(V(9) - V(8));
195  s += abs(V(10) - V(9));
196  s += abs(V(11) - V(10));
197  s += abs(V(12) - V(11));
198  s += abs(V(13) - V(12));
199  s += abs(V(14) - V(13));
200  s += abs(V(15) - V(14));
201 
202  pix1 += stride;
203  pix2 += stride;
204 
205  for (i = 1; i < h; i++) {
206  s += abs(V(0) - V(-stride));
207  for (j = 1; j < 16; j++)
208  s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
209  pix1 += stride;
210  pix2 += stride;
211 
212  }
213 #undef V
214  return s;
215 }
216 
217 static int pix_abs16_x2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
218  ptrdiff_t stride, int h)
219 {
220  int s = 0, i;
221 
222  for (i = 0; i < h; i++) {
223  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
224  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
225  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
226  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
227  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
228  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
229  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
230  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
231  s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
232  s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
233  s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
234  s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
235  s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
236  s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
237  s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
238  s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
239  pix1 += stride;
240  pix2 += stride;
241  }
242  return s;
243 }
244 
245 static int pix_abs16_y2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
246  ptrdiff_t stride, int h)
247 {
248  int s = 0, i;
249  const uint8_t *pix3 = pix2 + stride;
250 
251  for (i = 0; i < h; i++) {
252  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
253  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
254  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
255  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
256  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
257  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
258  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
259  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
260  s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
261  s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
262  s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
263  s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
264  s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
265  s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
266  s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
267  s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
268  pix1 += stride;
269  pix2 += stride;
270  pix3 += stride;
271  }
272  return s;
273 }
274 
275 static int pix_abs16_xy2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
276  ptrdiff_t stride, int h)
277 {
278  int s = 0, i;
279  const uint8_t *pix3 = pix2 + stride;
280 
281  for (i = 0; i < h; i++) {
282  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
283  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
284  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
285  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
286  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
287  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
288  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
289  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
290  s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
291  s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
292  s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
293  s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
294  s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
295  s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
296  s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
297  s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
298  pix1 += stride;
299  pix2 += stride;
300  pix3 += stride;
301  }
302  return s;
303 }
304 
305 static inline int pix_abs8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
306  ptrdiff_t stride, int h)
307 {
308  int s = 0, i;
309 
310  for (i = 0; i < h; i++) {
311  s += abs(pix1[0] - pix2[0]);
312  s += abs(pix1[1] - pix2[1]);
313  s += abs(pix1[2] - pix2[2]);
314  s += abs(pix1[3] - pix2[3]);
315  s += abs(pix1[4] - pix2[4]);
316  s += abs(pix1[5] - pix2[5]);
317  s += abs(pix1[6] - pix2[6]);
318  s += abs(pix1[7] - pix2[7]);
319  pix1 += stride;
320  pix2 += stride;
321  }
322  return s;
323 }
324 
325 static inline int pix_median_abs8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
326  ptrdiff_t stride, int h)
327 {
328  int s = 0, i, j;
329 
330 #define V(x) (pix1[x] - pix2[x])
331 
332  s += abs(V(0));
333  s += abs(V(1) - V(0));
334  s += abs(V(2) - V(1));
335  s += abs(V(3) - V(2));
336  s += abs(V(4) - V(3));
337  s += abs(V(5) - V(4));
338  s += abs(V(6) - V(5));
339  s += abs(V(7) - V(6));
340 
341  pix1 += stride;
342  pix2 += stride;
343 
344  for (i = 1; i < h; i++) {
345  s += abs(V(0) - V(-stride));
346  for (j = 1; j < 8; j++)
347  s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
348  pix1 += stride;
349  pix2 += stride;
350 
351  }
352 #undef V
353  return s;
354 }
355 
356 static int pix_abs8_x2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
357  ptrdiff_t stride, int h)
358 {
359  int s = 0, i;
360 
361  for (i = 0; i < h; i++) {
362  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
363  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
364  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
365  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
366  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
367  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
368  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
369  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
370  pix1 += stride;
371  pix2 += stride;
372  }
373  return s;
374 }
375 
376 static int pix_abs8_y2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
377  ptrdiff_t stride, int h)
378 {
379  int s = 0, i;
380  const uint8_t *pix3 = pix2 + stride;
381 
382  for (i = 0; i < h; i++) {
383  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
384  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
385  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
386  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
387  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
388  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
389  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
390  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
391  pix1 += stride;
392  pix2 += stride;
393  pix3 += stride;
394  }
395  return s;
396 }
397 
398 static int pix_abs8_xy2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
399  ptrdiff_t stride, int h)
400 {
401  int s = 0, i;
402  const uint8_t *pix3 = pix2 + stride;
403 
404  for (i = 0; i < h; i++) {
405  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
406  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
407  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
408  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
409  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
410  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
411  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
412  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
413  pix1 += stride;
414  pix2 += stride;
415  pix3 += stride;
416  }
417  return s;
418 }
419 
420 static int nsse16_c(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
421  ptrdiff_t stride, int h)
422 {
423  int score1 = 0, score2 = 0, x, y;
424 
425  for (y = 0; y < h; y++) {
426  for (x = 0; x < 16; x++)
427  score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
428  if (y + 1 < h) {
429  for (x = 0; x < 15; x++)
430  score2 += FFABS(s1[x] - s1[x + stride] -
431  s1[x + 1] + s1[x + stride + 1]) -
432  FFABS(s2[x] - s2[x + stride] -
433  s2[x + 1] + s2[x + stride + 1]);
434  }
435  s1 += stride;
436  s2 += stride;
437  }
438 
439  if (c)
440  return score1 + FFABS(score2) * c->avctx->nsse_weight;
441  else
442  return score1 + FFABS(score2) * 8;
443 }
444 
445 static int nsse8_c(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
446  ptrdiff_t stride, int h)
447 {
448  int score1 = 0, score2 = 0, x, y;
449 
450  for (y = 0; y < h; y++) {
451  for (x = 0; x < 8; x++)
452  score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
453  if (y + 1 < h) {
454  for (x = 0; x < 7; x++)
455  score2 += FFABS(s1[x] - s1[x + stride] -
456  s1[x + 1] + s1[x + stride + 1]) -
457  FFABS(s2[x] - s2[x + stride] -
458  s2[x + 1] + s2[x + stride + 1]);
459  }
460  s1 += stride;
461  s2 += stride;
462  }
463 
464  if (c)
465  return score1 + FFABS(score2) * c->avctx->nsse_weight;
466  else
467  return score1 + FFABS(score2) * 8;
468 }
469 
470 static int zero_cmp(MpegEncContext *s, const uint8_t *a, const uint8_t *b,
471  ptrdiff_t stride, int h)
472 {
473  return 0;
474 }
475 
477 {
478  int i;
479 
480  memset(cmp, 0, sizeof(void *) * 6);
481 
482  for (i = 0; i < 6; i++) {
483  switch (type & 0xFF) {
484  case FF_CMP_SAD:
485  cmp[i] = c->sad[i];
486  break;
487  case FF_CMP_MEDIAN_SAD:
488  cmp[i] = c->median_sad[i];
489  break;
490  case FF_CMP_SATD:
491  cmp[i] = c->hadamard8_diff[i];
492  break;
493  case FF_CMP_SSE:
494  cmp[i] = c->sse[i];
495  break;
496  case FF_CMP_DCT:
497  cmp[i] = c->dct_sad[i];
498  break;
499  case FF_CMP_DCT264:
500  cmp[i] = c->dct264_sad[i];
501  break;
502  case FF_CMP_DCTMAX:
503  cmp[i] = c->dct_max[i];
504  break;
505  case FF_CMP_PSNR:
506  cmp[i] = c->quant_psnr[i];
507  break;
508  case FF_CMP_BIT:
509  cmp[i] = c->bit[i];
510  break;
511  case FF_CMP_RD:
512  cmp[i] = c->rd[i];
513  break;
514  case FF_CMP_VSAD:
515  cmp[i] = c->vsad[i];
516  break;
517  case FF_CMP_VSSE:
518  cmp[i] = c->vsse[i];
519  break;
520  case FF_CMP_ZERO:
521  cmp[i] = zero_cmp;
522  break;
523  case FF_CMP_NSSE:
524  cmp[i] = c->nsse[i];
525  break;
526 #if CONFIG_DWT
527  case FF_CMP_W53:
528  cmp[i]= c->w53[i];
529  break;
530  case FF_CMP_W97:
531  cmp[i]= c->w97[i];
532  break;
533 #endif
534  default:
536  "internal error in cmp function selection\n");
537  }
538  }
539 }
540 
541 #define BUTTERFLY2(o1, o2, i1, i2) \
542  o1 = (i1) + (i2); \
543  o2 = (i1) - (i2);
544 
545 #define BUTTERFLY1(x, y) \
546  { \
547  int a, b; \
548  a = x; \
549  b = y; \
550  x = a + b; \
551  y = a - b; \
552  }
553 
554 #define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
555 
556 static int hadamard8_diff8x8_c(MpegEncContext *s, const uint8_t *dst,
557  const uint8_t *src, ptrdiff_t stride, int h)
558 {
559  int i, temp[64], sum = 0;
560 
561  for (i = 0; i < 8; i++) {
562  // FIXME: try pointer walks
563  BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
564  src[stride * i + 0] - dst[stride * i + 0],
565  src[stride * i + 1] - dst[stride * i + 1]);
566  BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
567  src[stride * i + 2] - dst[stride * i + 2],
568  src[stride * i + 3] - dst[stride * i + 3]);
569  BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
570  src[stride * i + 4] - dst[stride * i + 4],
571  src[stride * i + 5] - dst[stride * i + 5]);
572  BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
573  src[stride * i + 6] - dst[stride * i + 6],
574  src[stride * i + 7] - dst[stride * i + 7]);
575 
576  BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
577  BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
578  BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
579  BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
580 
581  BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
582  BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
583  BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
584  BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
585  }
586 
587  for (i = 0; i < 8; i++) {
588  BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
589  BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
590  BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
591  BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
592 
593  BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
594  BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
595  BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
596  BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
597 
598  sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) +
599  BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) +
600  BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) +
601  BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
602  }
603  return sum;
604 }
605 
606 static int hadamard8_intra8x8_c(MpegEncContext *s, const uint8_t *src,
607  const uint8_t *dummy, ptrdiff_t stride, int h)
608 {
609  int i, temp[64], sum = 0;
610 
611  for (i = 0; i < 8; i++) {
612  // FIXME: try pointer walks
613  BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
614  src[stride * i + 0], src[stride * i + 1]);
615  BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
616  src[stride * i + 2], src[stride * i + 3]);
617  BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
618  src[stride * i + 4], src[stride * i + 5]);
619  BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
620  src[stride * i + 6], src[stride * i + 7]);
621 
622  BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
623  BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
624  BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
625  BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
626 
627  BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
628  BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
629  BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
630  BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
631  }
632 
633  for (i = 0; i < 8; i++) {
634  BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
635  BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
636  BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
637  BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
638 
639  BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
640  BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
641  BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
642  BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
643 
644  sum +=
645  BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i])
646  + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i])
647  + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i])
648  + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
649  }
650 
651  sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean
652 
653  return sum;
654 }
655 
656 static int dct_sad8x8_c(MpegEncContext *s, const uint8_t *src1,
657  const uint8_t *src2, ptrdiff_t stride, int h)
658 {
659  LOCAL_ALIGNED_16(int16_t, temp, [64]);
660 
661  s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
662  s->fdsp.fdct(temp);
663  return s->mecc.sum_abs_dctelem(temp);
664 }
665 
666 #if CONFIG_GPL
667 #define DCT8_1D \
668  { \
669  const int s07 = SRC(0) + SRC(7); \
670  const int s16 = SRC(1) + SRC(6); \
671  const int s25 = SRC(2) + SRC(5); \
672  const int s34 = SRC(3) + SRC(4); \
673  const int a0 = s07 + s34; \
674  const int a1 = s16 + s25; \
675  const int a2 = s07 - s34; \
676  const int a3 = s16 - s25; \
677  const int d07 = SRC(0) - SRC(7); \
678  const int d16 = SRC(1) - SRC(6); \
679  const int d25 = SRC(2) - SRC(5); \
680  const int d34 = SRC(3) - SRC(4); \
681  const int a4 = d16 + d25 + (d07 + (d07 >> 1)); \
682  const int a5 = d07 - d34 - (d25 + (d25 >> 1)); \
683  const int a6 = d07 + d34 - (d16 + (d16 >> 1)); \
684  const int a7 = d16 - d25 + (d34 + (d34 >> 1)); \
685  DST(0, a0 + a1); \
686  DST(1, a4 + (a7 >> 2)); \
687  DST(2, a2 + (a3 >> 1)); \
688  DST(3, a5 + (a6 >> 2)); \
689  DST(4, a0 - a1); \
690  DST(5, a6 - (a5 >> 2)); \
691  DST(6, (a2 >> 1) - a3); \
692  DST(7, (a4 >> 2) - a7); \
693  }
694 
695 static int dct264_sad8x8_c(MpegEncContext *s, const uint8_t *src1,
696  const uint8_t *src2, ptrdiff_t stride, int h)
697 {
698  int16_t dct[8][8];
699  int i, sum = 0;
700 
701  s->pdsp.diff_pixels_unaligned(dct[0], src1, src2, stride);
702 
703 #define SRC(x) dct[i][x]
704 #define DST(x, v) dct[i][x] = v
705  for (i = 0; i < 8; i++)
706  DCT8_1D
707 #undef SRC
708 #undef DST
709 
710 #define SRC(x) dct[x][i]
711 #define DST(x, v) sum += FFABS(v)
712  for (i = 0; i < 8; i++)
713  DCT8_1D
714 #undef SRC
715 #undef DST
716  return sum;
717 }
718 #endif
719 
720 static int dct_max8x8_c(MpegEncContext *s, const uint8_t *src1,
721  const uint8_t *src2, ptrdiff_t stride, int h)
722 {
723  LOCAL_ALIGNED_16(int16_t, temp, [64]);
724  int sum = 0, i;
725 
726  s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
727  s->fdsp.fdct(temp);
728 
729  for (i = 0; i < 64; i++)
730  sum = FFMAX(sum, FFABS(temp[i]));
731 
732  return sum;
733 }
734 
735 static int quant_psnr8x8_c(MpegEncContext *s, const uint8_t *src1,
736  const uint8_t *src2, ptrdiff_t stride, int h)
737 {
738  LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]);
739  int16_t *const bak = temp + 64;
740  int sum = 0, i;
741 
742  s->mb_intra = 0;
743 
744  s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
745 
746  memcpy(bak, temp, 64 * sizeof(int16_t));
747 
748  s->block_last_index[0 /* FIXME */] =
749  s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
750  s->dct_unquantize_inter(s, temp, 0, s->qscale);
752 
753  for (i = 0; i < 64; i++)
754  sum += (temp[i] - bak[i]) * (temp[i] - bak[i]);
755 
756  return sum;
757 }
758 
759 static int rd8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2,
760  ptrdiff_t stride, int h)
761 {
762  const uint8_t *scantable = s->intra_scantable.permutated;
763  LOCAL_ALIGNED_16(int16_t, temp, [64]);
764  LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
765  LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
766  int i, last, run, bits, level, distortion, start_i;
767  const int esc_length = s->ac_esc_length;
768  uint8_t *length, *last_length;
769 
770  copy_block8(lsrc1, src1, 8, stride, 8);
771  copy_block8(lsrc2, src2, 8, stride, 8);
772 
773  s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8);
774 
775  s->block_last_index[0 /* FIXME */] =
776  last =
777  s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
778 
779  bits = 0;
780 
781  if (s->mb_intra) {
782  start_i = 1;
783  length = s->intra_ac_vlc_length;
784  last_length = s->intra_ac_vlc_last_length;
785  bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
786  } else {
787  start_i = 0;
788  length = s->inter_ac_vlc_length;
789  last_length = s->inter_ac_vlc_last_length;
790  }
791 
792  if (last >= start_i) {
793  run = 0;
794  for (i = start_i; i < last; i++) {
795  int j = scantable[i];
796  level = temp[j];
797 
798  if (level) {
799  level += 64;
800  if ((level & (~127)) == 0)
801  bits += length[UNI_AC_ENC_INDEX(run, level)];
802  else
803  bits += esc_length;
804  run = 0;
805  } else
806  run++;
807  }
808  i = scantable[last];
809 
810  level = temp[i] + 64;
811 
812  av_assert2(level - 64);
813 
814  if ((level & (~127)) == 0) {
815  bits += last_length[UNI_AC_ENC_INDEX(run, level)];
816  } else
817  bits += esc_length;
818  }
819 
820  if (last >= 0) {
821  if (s->mb_intra)
822  s->dct_unquantize_intra(s, temp, 0, s->qscale);
823  else
824  s->dct_unquantize_inter(s, temp, 0, s->qscale);
825  }
826 
827  s->idsp.idct_add(lsrc2, 8, temp);
828 
829  distortion = s->mecc.sse[1](NULL, lsrc2, lsrc1, 8, 8);
830 
831  return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7);
832 }
833 
834 static int bit8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2,
835  ptrdiff_t stride, int h)
836 {
837  const uint8_t *scantable = s->intra_scantable.permutated;
838  LOCAL_ALIGNED_16(int16_t, temp, [64]);
839  int i, last, run, bits, level, start_i;
840  const int esc_length = s->ac_esc_length;
841  uint8_t *length, *last_length;
842 
843  s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
844 
845  s->block_last_index[0 /* FIXME */] =
846  last =
847  s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
848 
849  bits = 0;
850 
851  if (s->mb_intra) {
852  start_i = 1;
853  length = s->intra_ac_vlc_length;
854  last_length = s->intra_ac_vlc_last_length;
855  bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
856  } else {
857  start_i = 0;
858  length = s->inter_ac_vlc_length;
859  last_length = s->inter_ac_vlc_last_length;
860  }
861 
862  if (last >= start_i) {
863  run = 0;
864  for (i = start_i; i < last; i++) {
865  int j = scantable[i];
866  level = temp[j];
867 
868  if (level) {
869  level += 64;
870  if ((level & (~127)) == 0)
871  bits += length[UNI_AC_ENC_INDEX(run, level)];
872  else
873  bits += esc_length;
874  run = 0;
875  } else
876  run++;
877  }
878  i = scantable[last];
879 
880  level = temp[i] + 64;
881 
882  av_assert2(level - 64);
883 
884  if ((level & (~127)) == 0)
885  bits += last_length[UNI_AC_ENC_INDEX(run, level)];
886  else
887  bits += esc_length;
888  }
889 
890  return bits;
891 }
892 
893 #define VSAD_INTRA(size) \
894 static int vsad_intra ## size ## _c(MpegEncContext *c, \
895  const uint8_t *s, const uint8_t *dummy, \
896  ptrdiff_t stride, int h) \
897 { \
898  int score = 0, x, y; \
899  \
900  for (y = 1; y < h; y++) { \
901  for (x = 0; x < size; x += 4) { \
902  score += FFABS(s[x] - s[x + stride]) + \
903  FFABS(s[x + 1] - s[x + stride + 1]) + \
904  FFABS(s[x + 2] - s[x + 2 + stride]) + \
905  FFABS(s[x + 3] - s[x + 3 + stride]); \
906  } \
907  s += stride; \
908  } \
909  \
910  return score; \
911 }
912 VSAD_INTRA(8)
913 VSAD_INTRA(16)
914 
915 #define VSAD(size) \
916 static int vsad ## size ## _c(MpegEncContext *c, \
917  const uint8_t *s1, const uint8_t *s2, \
918  ptrdiff_t stride, int h) \
919 { \
920  int score = 0, x, y; \
921  \
922  for (y = 1; y < h; y++) { \
923  for (x = 0; x < size; x++) \
924  score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \
925  s1 += stride; \
926  s2 += stride; \
927  } \
928  \
929  return score; \
930 }
931 VSAD(8)
932 VSAD(16)
933 
934 #define SQ(a) ((a) * (a))
935 #define VSSE_INTRA(size) \
936 static int vsse_intra ## size ## _c(MpegEncContext *c, \
937  const uint8_t *s, const uint8_t *dummy, \
938  ptrdiff_t stride, int h) \
939 { \
940  int score = 0, x, y; \
941  \
942  for (y = 1; y < h; y++) { \
943  for (x = 0; x < size; x += 4) { \
944  score += SQ(s[x] - s[x + stride]) + \
945  SQ(s[x + 1] - s[x + stride + 1]) + \
946  SQ(s[x + 2] - s[x + stride + 2]) + \
947  SQ(s[x + 3] - s[x + stride + 3]); \
948  } \
949  s += stride; \
950  } \
951  \
952  return score; \
953 }
954 VSSE_INTRA(8)
955 VSSE_INTRA(16)
956 
957 #define VSSE(size) \
958 static int vsse ## size ## _c(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, \
959  ptrdiff_t stride, int h) \
960 { \
961  int score = 0, x, y; \
962  \
963  for (y = 1; y < h; y++) { \
964  for (x = 0; x < size; x++) \
965  score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \
966  s1 += stride; \
967  s2 += stride; \
968  } \
969  \
970  return score; \
971 }
972 VSSE(8)
973 VSSE(16)
974 
975 #define WRAPPER8_16_SQ(name8, name16) \
976 static int name16(MpegEncContext *s, const uint8_t *dst, const uint8_t *src, \
977  ptrdiff_t stride, int h) \
978 { \
979  int score = 0; \
980  \
981  score += name8(s, dst, src, stride, 8); \
982  score += name8(s, dst + 8, src + 8, stride, 8); \
983  if (h == 16) { \
984  dst += 8 * stride; \
985  src += 8 * stride; \
986  score += name8(s, dst, src, stride, 8); \
987  score += name8(s, dst + 8, src + 8, stride, 8); \
988  } \
989  return score; \
990 }
991 
992 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
993 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
994 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
995 #if CONFIG_GPL
996 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
997 #endif
998 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
999 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
1000 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
1001 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
1002 
1004 {
1005  c->sum_abs_dctelem = sum_abs_dctelem_c;
1006 
1007  /* TODO [0] 16 [1] 8 */
1008  c->pix_abs[0][0] = pix_abs16_c;
1009  c->pix_abs[0][1] = pix_abs16_x2_c;
1010  c->pix_abs[0][2] = pix_abs16_y2_c;
1011  c->pix_abs[0][3] = pix_abs16_xy2_c;
1012  c->pix_abs[1][0] = pix_abs8_c;
1013  c->pix_abs[1][1] = pix_abs8_x2_c;
1014  c->pix_abs[1][2] = pix_abs8_y2_c;
1015  c->pix_abs[1][3] = pix_abs8_xy2_c;
1016 
1017 #define SET_CMP_FUNC(name) \
1018  c->name[0] = name ## 16_c; \
1019  c->name[1] = name ## 8x8_c;
1020 
1021  SET_CMP_FUNC(hadamard8_diff)
1022  c->hadamard8_diff[4] = hadamard8_intra16_c;
1023  c->hadamard8_diff[5] = hadamard8_intra8x8_c;
1024  SET_CMP_FUNC(dct_sad)
1025  SET_CMP_FUNC(dct_max)
1026 #if CONFIG_GPL
1027  SET_CMP_FUNC(dct264_sad)
1028 #endif
1029  c->sad[0] = pix_abs16_c;
1030  c->sad[1] = pix_abs8_c;
1031  c->sse[0] = sse16_c;
1032  c->sse[1] = sse8_c;
1033  c->sse[2] = sse4_c;
1034  SET_CMP_FUNC(quant_psnr)
1035  SET_CMP_FUNC(rd)
1036  SET_CMP_FUNC(bit)
1037  c->vsad[0] = vsad16_c;
1038  c->vsad[1] = vsad8_c;
1039  c->vsad[4] = vsad_intra16_c;
1040  c->vsad[5] = vsad_intra8_c;
1041  c->vsse[0] = vsse16_c;
1042  c->vsse[1] = vsse8_c;
1043  c->vsse[4] = vsse_intra16_c;
1044  c->vsse[5] = vsse_intra8_c;
1045  c->nsse[0] = nsse16_c;
1046  c->nsse[1] = nsse8_c;
1047 #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
1049 #endif
1050 
1051  c->median_sad[0] = pix_median_abs16_c;
1052  c->median_sad[1] = pix_median_abs8_c;
1053 
1054 #if ARCH_AARCH64
1055  ff_me_cmp_init_aarch64(c, avctx);
1056 #elif ARCH_ALPHA
1057  ff_me_cmp_init_alpha(c, avctx);
1058 #elif ARCH_ARM
1059  ff_me_cmp_init_arm(c, avctx);
1060 #elif ARCH_PPC
1061  ff_me_cmp_init_ppc(c, avctx);
1062 #elif ARCH_X86
1063  ff_me_cmp_init_x86(c, avctx);
1064 #elif ARCH_MIPS
1065  ff_me_cmp_init_mips(c, avctx);
1066 #endif
1067 
1068 }
sum_abs_dctelem_c
static int sum_abs_dctelem_c(const int16_t *block)
Definition: me_cmp.c:138
level
uint8_t level
Definition: svq3.c:204
pix_abs8_y2_c
static int pix_abs8_y2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:376
pix_median_abs8_c
static int pix_median_abs8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:325
nsse16_c
static int nsse16_c(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
Definition: me_cmp.c:420
mem_internal.h
sse8_c
static int sse8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:87
pix_abs8_x2_c
static int pix_abs8_x2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:356
src1
const pixel * src1
Definition: h264pred_template.c:421
mpegvideoenc.h
VSSE_INTRA
#define VSSE_INTRA(size)
Definition: me_cmp.c:935
b
#define b
Definition: input.c:41
avg2
#define avg2(a, b)
Definition: me_cmp.c:147
copy_block8
static void copy_block8(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)
Definition: copy_block.h:47
ff_me_cmp_init_x86
void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
BUTTERFLYA
#define BUTTERFLYA(x, y)
Definition: me_cmp.c:554
ff_set_cmp
void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type)
Definition: me_cmp.c:476
pix_abs16_x2_c
static int pix_abs16_x2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:217
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:58
FF_CMP_VSSE
#define FF_CMP_VSSE
Definition: avcodec.h:803
pix_abs8_xy2_c
static int pix_abs8_xy2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:398
dummy
int dummy
Definition: motion.c:65
sse4_c
static int sse4_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:70
ff_me_cmp_init
av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp.c:1003
pix_abs16_c
static int pix_abs16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:150
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
FF_CMP_SSE
#define FF_CMP_SSE
Definition: avcodec.h:795
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
FF_CMP_BIT
#define FF_CMP_BIT
Definition: avcodec.h:799
pix_abs16_y2_c
static int pix_abs16_y2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:245
s
#define s(width, name)
Definition: cbs_vp9.c:256
s1
#define s1
Definition: regdef.h:38
bits
uint8_t bits
Definition: vp3data.h:141
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:131
pix_median_abs16_c
static int pix_median_abs16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:178
simple_idct.h
zero_cmp
static int zero_cmp(MpegEncContext *s, const uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h)
Definition: me_cmp.c:470
pix_abs16_xy2_c
static int pix_abs16_xy2_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:275
VSAD_INTRA
#define VSAD_INTRA(size)
Definition: me_cmp.c:893
cmp
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby, const int size, const int h, int ref_index, int src_index, me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags)
compares a block (either a full macroblock or a partition thereof) against a proposed motion-compensa...
Definition: motion_est.c:262
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:64
MECmpContext
Definition: me_cmp.h:53
NULL
#define NULL
Definition: coverity.c:32
run
uint8_t run
Definition: svq3.c:203
VSSE
#define VSSE(size)
Definition: me_cmp.c:957
abs
#define abs(x)
Definition: cuda_runtime.h:35
FF_CMP_MEDIAN_SAD
#define FF_CMP_MEDIAN_SAD
Definition: avcodec.h:809
hadamard8_intra8x8_c
static int hadamard8_intra8x8_c(MpegEncContext *s, const uint8_t *src, const uint8_t *dummy, ptrdiff_t stride, int h)
Definition: me_cmp.c:606
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_me_cmp_init_mips
void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_init_mips.c:25
s2
#define s2
Definition: regdef.h:39
FF_CMP_PSNR
#define FF_CMP_PSNR
Definition: avcodec.h:798
FF_CMP_W53
#define FF_CMP_W53
Definition: avcodec.h:805
pix_abs8_c
static int pix_abs8_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:305
quant_psnr8x8_c
static int quant_psnr8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: me_cmp.c:735
ff_me_cmp_init_alpha
av_cold void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_alpha.c:267
FF_CMP_SATD
#define FF_CMP_SATD
Definition: avcodec.h:796
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
FF_CMP_ZERO
#define FF_CMP_ZERO
Definition: avcodec.h:801
attributes.h
FF_CMP_SAD
#define FF_CMP_SAD
Definition: avcodec.h:794
BUTTERFLY1
#define BUTTERFLY1(x, y)
Definition: me_cmp.c:545
SRC
#define SRC(x, y)
Definition: h264pred_template.c:825
UNI_AC_ENC_INDEX
#define UNI_AC_ENC_INDEX(run, level)
Definition: mpegvideoenc.h:36
hadamard8_diff8x8_c
static int hadamard8_diff8x8_c(MpegEncContext *s, const uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h)
Definition: me_cmp.c:556
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
ff_me_cmp_init_aarch64
av_cold void ff_me_cmp_init_aarch64(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_init_aarch64.c:80
ff_simple_idct_int16_8bit
void ff_simple_idct_int16_8bit(int16_t *block)
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
copy_block.h
FF_CMP_RD
#define FF_CMP_RD
Definition: avcodec.h:800
internal.h
ff_square_tab
const uint32_t ff_square_tab[512]
Definition: me_cmp.c:35
src2
const pixel * src2
Definition: h264pred_template.c:422
FF_CMP_NSSE
#define FF_CMP_NSSE
Definition: avcodec.h:804
dct
static void dct(AudioRNNContext *s, float *out, const float *in)
Definition: af_arnndn.c:1010
dct_max8x8_c
static int dct_max8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: me_cmp.c:720
avcodec.h
stride
#define stride
Definition: h264pred_template.c:537
mid_pred
#define mid_pred
Definition: mathops.h:98
me_cmp_func
int(* me_cmp_func)(struct MpegEncContext *c, const uint8_t *blk1, const uint8_t *blk2, ptrdiff_t stride, int h)
Definition: me_cmp.h:48
me_cmp.h
AVCodecContext
main external API structure.
Definition: avcodec.h:398
WRAPPER8_16_SQ
#define WRAPPER8_16_SQ(name8, name16)
Definition: me_cmp.c:975
sse16_c
static int sse16_c(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp.c:108
FF_CMP_DCT
#define FF_CMP_DCT
Definition: avcodec.h:797
avg4
#define avg4(a, b, c, d)
Definition: me_cmp.c:148
temp
else temp
Definition: vf_mcdeint.c:248
dct_sad8x8_c
static int dct_sad8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: me_cmp.c:656
DCT8_1D
#define DCT8_1D(src, srcstride, dst, dststride)
Definition: h264dsp.c:95
ff_me_cmp_init_arm
av_cold void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_init_arm.c:41
DST
#define DST(x, y)
Definition: vp9dsp_template.c:781
rd8x8_c
static int rd8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: me_cmp.c:759
ff_me_cmp_init_ppc
av_cold void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp.c:726
ff_dsputil_init_dwt
void ff_dsputil_init_dwt(MECmpContext *c)
Definition: snow_dwt.c:842
FF_CMP_DCTMAX
#define FF_CMP_DCTMAX
Definition: avcodec.h:807
FF_CMP_DCT264
#define FF_CMP_DCT264
Definition: avcodec.h:808
FF_CMP_VSAD
#define FF_CMP_VSAD
Definition: avcodec.h:802
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
SET_CMP_FUNC
#define SET_CMP_FUNC(name)
nsse8_c
static int nsse8_c(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
Definition: me_cmp.c:445
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
V
#define V(x)
h
h
Definition: vp9dsp_template.c:2038
VSAD
#define VSAD(size)
Definition: me_cmp.c:915
bit8x8_c
static int bit8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: me_cmp.c:834
BUTTERFLY2
#define BUTTERFLY2(o1, o2, i1, i2)
Definition: me_cmp.c:541
MpegEncContext
MpegEncContext.
Definition: mpegvideo.h:62
FF_CMP_W97
#define FF_CMP_W97
Definition: avcodec.h:806