FFmpeg
me_cmp_msa.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
22 #include "me_cmp_mips.h"
23 
24 static uint32_t sad_8width_msa(const uint8_t *src, int32_t src_stride,
25  const uint8_t *ref, int32_t ref_stride,
27 {
28  int32_t ht_cnt = height >> 2;
29  int res = (height & 0x03);
30  v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
31  v8u16 zero = { 0 };
32  v8u16 sad = { 0 };
33 
34  for (; ht_cnt--; ) {
35  LD_UB4(src, src_stride, src0, src1, src2, src3);
36  src += (4 * src_stride);
37  LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
38  ref += (4 * ref_stride);
39 
40  PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2,
41  src0, src1, ref0, ref1);
42  sad += SAD_UB2_UH(src0, src1, ref0, ref1);
43  }
44  for (; res--; ) {
45  v16u8 diff;
46  src0 = LD_UB(src);
47  ref0 = LD_UB(ref);
48  src += src_stride;
49  ref += ref_stride;
50  diff = __msa_asub_u_b((v16u8) src0, (v16u8) ref0);
51  diff = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)diff);
52  sad += __msa_hadd_u_h((v16u8) diff, (v16u8) diff);
53  }
54 
55  return (HADD_UH_U32(sad));
56 }
57 
58 static uint32_t sad_16width_msa(const uint8_t *src, int32_t src_stride,
59  const uint8_t *ref, int32_t ref_stride,
61 {
62  int32_t ht_cnt = height >> 2;
63  int res = (height & 0x03);
64  v16u8 src0, src1, ref0, ref1;
65  v8u16 sad = { 0 };
66 
67  for (; ht_cnt--; ) {
68  LD_UB2(src, src_stride, src0, src1);
69  src += (2 * src_stride);
70  LD_UB2(ref, ref_stride, ref0, ref1);
71  ref += (2 * ref_stride);
72  sad += SAD_UB2_UH(src0, src1, ref0, ref1);
73 
74  LD_UB2(src, src_stride, src0, src1);
75  src += (2 * src_stride);
76  LD_UB2(ref, ref_stride, ref0, ref1);
77  ref += (2 * ref_stride);
78  sad += SAD_UB2_UH(src0, src1, ref0, ref1);
79  }
80  for (; res > 0; res--) {
81  v16u8 diff;
82  src0 = LD_UB(src);
83  ref0 = LD_UB(ref);
84  src += src_stride;
85  ref += ref_stride;
86  diff = __msa_asub_u_b((v16u8) src0, (v16u8) ref0);
87  sad += __msa_hadd_u_h((v16u8) diff, (v16u8) diff);
88  }
89  return (HADD_UH_U32(sad));
90 }
91 
92 static uint32_t sad_horiz_bilinear_filter_8width_msa(const uint8_t *src,
93  int32_t src_stride,
94  const uint8_t *ref,
95  int32_t ref_stride,
97 {
98  int32_t ht_cnt = height >> 3;
99  int32_t res = height & 0x07;
100  v16u8 src0, src1, src2, src3, comp0, comp1;
101  v16u8 ref0, ref1, ref2, ref3, ref4, ref5;
102  v8u16 zero = { 0 };
103  v8u16 sad = { 0 };
104 
105  for (; ht_cnt--; ) {
106  LD_UB4(src, src_stride, src0, src1, src2, src3);
107  src += (4 * src_stride);
108  LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
109  ref += (4 * ref_stride);
110 
111  PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
112  PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref4, ref5);
113  SLDI_B4_UB(ref0, ref0, ref1, ref1, ref2, ref2, ref3, ref3, 1,
114  ref0, ref1, ref2, ref3);
115  PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
116  AVER_UB2_UB(ref4, ref0, ref5, ref1, comp0, comp1);
117  sad += SAD_UB2_UH(src0, src1, comp0, comp1);
118 
119  LD_UB4(src, src_stride, src0, src1, src2, src3);
120  src += (4 * src_stride);
121  LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
122  ref += (4 * ref_stride);
123 
124  PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
125  PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref4, ref5);
126  SLDI_B4_UB(ref0, ref0, ref1, ref1, ref2, ref2, ref3, ref3, 1,
127  ref0, ref1, ref2, ref3);
128  PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
129  AVER_UB2_UB(ref4, ref0, ref5, ref1, comp0, comp1);
130  sad += SAD_UB2_UH(src0, src1, comp0, comp1);
131  }
132 
133  for (; res--; ) {
134  v16u8 diff;
135  src0 = LD_UB(src);
136  ref0 = LD_UB(ref);
137  ref1 = LD_UB(ref + 1);
138  src += src_stride;
139  ref += ref_stride;
140  comp0 = (v16u8)__msa_aver_u_b((v16u8) ref0, (v16u8) ref1);
141  diff = __msa_asub_u_b((v16u8) src0, (v16u8) comp0);
142  diff = (v16u8)__msa_ilvr_d((v2i64) zero, (v2i64) diff);
143  sad += __msa_hadd_u_h((v16u8) diff, (v16u8) diff);
144  }
145  return (HADD_UH_U32(sad));
146 }
147 
148 static uint32_t sad_horiz_bilinear_filter_16width_msa(const uint8_t *src,
149  int32_t src_stride,
150  const uint8_t *ref,
151  int32_t ref_stride,
152  int32_t height)
153 {
154  int32_t ht_cnt = height >> 3;
155  int32_t res = height & 0x07;
156  v16u8 src0, src1, src2, src3, comp0, comp1;
157  v16u8 ref00, ref10, ref20, ref30, ref01, ref11, ref21, ref31;
158  v8u16 sad = { 0 };
159 
160  for (; ht_cnt--; ) {
161  LD_UB4(src, src_stride, src0, src1, src2, src3);
162  src += (4 * src_stride);
163  LD_UB4(ref, ref_stride, ref00, ref10, ref20, ref30);
164  LD_UB4(ref + 1, ref_stride, ref01, ref11, ref21, ref31);
165  ref += (4 * ref_stride);
166 
167  AVER_UB2_UB(ref01, ref00, ref11, ref10, comp0, comp1);
168  sad += SAD_UB2_UH(src0, src1, comp0, comp1);
169  AVER_UB2_UB(ref21, ref20, ref31, ref30, comp0, comp1);
170  sad += SAD_UB2_UH(src2, src3, comp0, comp1);
171 
172  LD_UB4(src, src_stride, src0, src1, src2, src3);
173  src += (4 * src_stride);
174  LD_UB4(ref, ref_stride, ref00, ref10, ref20, ref30);
175  LD_UB4(ref + 1, ref_stride, ref01, ref11, ref21, ref31);
176  ref += (4 * ref_stride);
177 
178  AVER_UB2_UB(ref01, ref00, ref11, ref10, comp0, comp1);
179  sad += SAD_UB2_UH(src0, src1, comp0, comp1);
180  AVER_UB2_UB(ref21, ref20, ref31, ref30, comp0, comp1);
181  sad += SAD_UB2_UH(src2, src3, comp0, comp1);
182  }
183 
184  for (; res--; ) {
185  v16u8 diff;
186  src0 = LD_UB(src);
187  ref00 = LD_UB(ref);
188  ref01 = LD_UB(ref + 1);
189  src += src_stride;
190  ref += ref_stride;
191  comp0 = (v16u8)__msa_aver_u_b((v16u8) ref00, (v16u8) ref01);
192  diff = __msa_asub_u_b((v16u8) src0, (v16u8) comp0);
193  sad += __msa_hadd_u_h((v16u8) diff, (v16u8) diff);
194  }
195  return (HADD_UH_U32(sad));
196 }
197 
198 static uint32_t sad_vert_bilinear_filter_8width_msa(const uint8_t *src,
199  int32_t src_stride,
200  const uint8_t *ref,
201  int32_t ref_stride,
202  int32_t height)
203 {
204  int32_t ht_cnt = height >> 3;
205  int32_t res = height & 0x07;
206  v16u8 src0, src1, src2, src3, comp0, comp1;
207  v16u8 ref0, ref1, ref2, ref3, ref4;
208  v8u16 zero = { 0 };
209  v8u16 sad = { 0 };
210 
211  for (; ht_cnt--; ) {
212  LD_UB4(src, src_stride, src0, src1, src2, src3);
213  src += (4 * src_stride);
214  LD_UB5(ref, ref_stride, ref0, ref1, ref2, ref3, ref4);
215  ref += (4 * ref_stride);
216 
217  PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
218  PCKEV_D2_UB(ref1, ref0, ref2, ref1, ref0, ref1);
219  PCKEV_D2_UB(ref3, ref2, ref4, ref3, ref2, ref3);
220  AVER_UB2_UB(ref1, ref0, ref3, ref2, comp0, comp1);
221  sad += SAD_UB2_UH(src0, src1, comp0, comp1);
222 
223  LD_UB4(src, src_stride, src0, src1, src2, src3);
224  src += (4 * src_stride);
225  LD_UB5(ref, ref_stride, ref0, ref1, ref2, ref3, ref4);
226  ref += (4 * ref_stride);
227 
228  PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
229  PCKEV_D2_UB(ref1, ref0, ref2, ref1, ref0, ref1);
230  PCKEV_D2_UB(ref3, ref2, ref4, ref3, ref2, ref3);
231  AVER_UB2_UB(ref1, ref0, ref3, ref2, comp0, comp1);
232  sad += SAD_UB2_UH(src0, src1, comp0, comp1);
233  }
234 
235  for (; res--; ) {
236  v16u8 diff;
237  src0 = LD_UB(src);
238  LD_UB2(ref, ref_stride, ref0, ref1);
239  src += src_stride;
240  ref += ref_stride;
241  comp0 = (v16u8)__msa_aver_u_b((v16u8) ref0, (v16u8) ref1);
242  diff = __msa_asub_u_b((v16u8) src0, (v16u8) comp0);
243  diff = (v16u8)__msa_ilvr_d((v2i64) zero, (v2i64) diff);
244  sad += __msa_hadd_u_h((v16u8) diff, (v16u8) diff);
245  }
246  return (HADD_UH_U32(sad));
247 }
248 
249 static uint32_t sad_vert_bilinear_filter_16width_msa(const uint8_t *src,
250  int32_t src_stride,
251  const uint8_t *ref,
252  int32_t ref_stride,
253  int32_t height)
254 {
255  int32_t ht_cnt = height >> 3;
256  int32_t res = height & 0x07;
257  v16u8 src0, src1, src2, src3, comp0, comp1;
258  v16u8 ref0, ref1, ref2, ref3, ref4;
259  v8u16 sad = { 0 };
260 
261  for (; ht_cnt--; ) {
262  LD_UB5(ref, ref_stride, ref4, ref0, ref1, ref2, ref3);
263  ref += (5 * ref_stride);
264  LD_UB4(src, src_stride, src0, src1, src2, src3);
265  src += (4 * src_stride);
266 
267  AVER_UB2_UB(ref0, ref4, ref1, ref0, comp0, comp1);
268  sad += SAD_UB2_UH(src0, src1, comp0, comp1);
269  AVER_UB2_UB(ref2, ref1, ref3, ref2, comp0, comp1);
270  sad += SAD_UB2_UH(src2, src3, comp0, comp1);
271 
272  ref4 = ref3;
273 
274  LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
275  ref += (3 * ref_stride);
276  LD_UB4(src, src_stride, src0, src1, src2, src3);
277  src += (4 * src_stride);
278 
279  AVER_UB2_UB(ref0, ref4, ref1, ref0, comp0, comp1);
280  sad += SAD_UB2_UH(src0, src1, comp0, comp1);
281  AVER_UB2_UB(ref2, ref1, ref3, ref2, comp0, comp1);
282  sad += SAD_UB2_UH(src2, src3, comp0, comp1);
283  }
284 
285  for (; res--; ) {
286  v16u8 diff;
287  src0 = LD_UB(src);
288  LD_UB2(ref, ref_stride, ref0, ref1);
289  src += src_stride;
290  ref += ref_stride;
291  comp0 = (v16u8)__msa_aver_u_b((v16u8) ref0, (v16u8) ref1);
292  diff = __msa_asub_u_b((v16u8) src0, (v16u8) comp0);
293  sad += __msa_hadd_u_h((v16u8) diff, (v16u8) diff);
294  }
295  return (HADD_UH_U32(sad));
296 }
297 
298 static uint32_t sad_hv_bilinear_filter_8width_msa(const uint8_t *src,
299  int32_t src_stride,
300  const uint8_t *ref,
301  int32_t ref_stride,
302  int32_t height)
303 {
304  int32_t ht_cnt = height >> 2;
305  int32_t res = height & 0x03;
306  v16u8 src0, src1, src2, src3, temp0, temp1, diff;
307  v16u8 ref0, ref1, ref2, ref3, ref4;
308  v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
309  v8u16 comp0, comp1, comp2, comp3;
310  v8u16 zero = { 0 };
311  v8u16 sad = { 0 };
312 
313  for (ht_cnt = (height >> 2); ht_cnt--;) {
314  LD_UB5(ref, ref_stride, ref4, ref0, ref1, ref2, ref3);
315  ref += (4 * ref_stride);
316  LD_UB4(src, src_stride, src0, src1, src2, src3);
317  src += (4 * src_stride);
318 
319  PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
320 
321  VSHF_B2_UB(ref4, ref4, ref0, ref0, mask, mask, temp0, temp1);
322  comp0 = __msa_hadd_u_h(temp0, temp0);
323  comp1 = __msa_hadd_u_h(temp1, temp1);
324  comp0 += comp1;
325  comp0 = (v8u16) __msa_srari_h((v8i16) comp0, 2);
326  comp0 = (v8u16) __msa_pckev_b((v16i8) comp0, (v16i8) comp0);
327 
328  temp0 = (v16u8) __msa_vshf_b(mask, (v16i8) ref1, (v16i8) ref1);
329  comp2 = __msa_hadd_u_h(temp0, temp0);
330  comp1 += comp2;
331  comp1 = (v8u16) __msa_srari_h((v8i16) comp1, 2);
332  comp1 = (v8u16) __msa_pckev_b((v16i8) comp1, (v16i8) comp1);
333  comp1 = (v8u16) __msa_pckev_d((v2i64) comp1, (v2i64) comp0);
334  diff = (v16u8) __msa_asub_u_b(src0, (v16u8) comp1);
335  sad += __msa_hadd_u_h(diff, diff);
336 
337  temp1 = (v16u8) __msa_vshf_b(mask, (v16i8) ref2, (v16i8) ref2);
338  comp3 = __msa_hadd_u_h(temp1, temp1);
339  comp2 += comp3;
340  comp2 = (v8u16) __msa_srari_h((v8i16) comp2, 2);
341  comp2 = (v8u16) __msa_pckev_b((v16i8) comp2, (v16i8) comp2);
342 
343  temp0 = (v16u8) __msa_vshf_b(mask, (v16i8) ref3, (v16i8) ref3);
344  comp0 = __msa_hadd_u_h(temp0, temp0);
345  comp3 += comp0;
346  comp3 = (v8u16) __msa_srari_h((v8i16) comp3, 2);
347  comp3 = (v8u16) __msa_pckev_b((v16i8) comp3, (v16i8) comp3);
348  comp3 = (v8u16) __msa_pckev_d((v2i64) comp3, (v2i64) comp2);
349  diff = (v16u8) __msa_asub_u_b(src1, (v16u8) comp3);
350  sad += __msa_hadd_u_h(diff, diff);
351  }
352 
353  for (; res--; ) {
354  src0 = LD_UB(src);
355  LD_UB2(ref, ref_stride, ref0, ref1);
356  temp0 = (v16u8) __msa_vshf_b(mask, (v16i8) ref0, (v16i8) ref0);
357  temp1 = (v16u8) __msa_vshf_b(mask, (v16i8) ref1, (v16i8) ref1);
358  src += src_stride;
359  ref += ref_stride;
360  comp0 = __msa_hadd_u_h(temp0, temp0);
361  comp2 = __msa_hadd_u_h(temp1, temp1);
362  comp2 += comp0;
363  comp2 = (v8u16)__msa_srari_h((v8i16) comp2, 2);
364  comp0 = (v16u8) __msa_pckev_b((v16i8) zero, (v16i8) comp2);
365  diff = __msa_asub_u_b(src0, comp0);
366  diff = (v16u8)__msa_ilvr_d((v2i64) zero, (v2i64) diff);
367  sad += __msa_hadd_u_h(diff, diff);
368  }
369  return (HADD_UH_U32(sad));
370 }
371 
372 static uint32_t sad_hv_bilinear_filter_16width_msa(const uint8_t *src,
373  int32_t src_stride,
374  const uint8_t *ref,
375  int32_t ref_stride,
376  int32_t height)
377 {
378  int32_t ht_cnt = height >> 3;
379  int32_t res = height & 0x07;
380  v16u8 src0, src1, src2, src3, comp, diff;
381  v16u8 temp0, temp1, temp2, temp3;
382  v16u8 ref00, ref01, ref02, ref03, ref04, ref10, ref11, ref12, ref13, ref14;
383  v8u16 comp0, comp1, comp2, comp3;
384  v8u16 sad = { 0 };
385 
386  for (; ht_cnt--; ) {
387  LD_UB4(src, src_stride, src0, src1, src2, src3);
388  src += (4 * src_stride);
389  LD_UB5(ref, ref_stride, ref04, ref00, ref01, ref02, ref03);
390  LD_UB5(ref + 1, ref_stride, ref14, ref10, ref11, ref12, ref13);
391  ref += (5 * ref_stride);
392 
393  ILVRL_B2_UB(ref14, ref04, temp0, temp1);
394  comp0 = __msa_hadd_u_h(temp0, temp0);
395  comp1 = __msa_hadd_u_h(temp1, temp1);
396  ILVRL_B2_UB(ref10, ref00, temp2, temp3);
397  comp2 = __msa_hadd_u_h(temp2, temp2);
398  comp3 = __msa_hadd_u_h(temp3, temp3);
399  comp0 += comp2;
400  comp1 += comp3;
401  SRARI_H2_UH(comp0, comp1, 2);
402  comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
403  diff = __msa_asub_u_b(src0, comp);
404  sad += __msa_hadd_u_h(diff, diff);
405 
406  ILVRL_B2_UB(ref11, ref01, temp0, temp1);
407  comp0 = __msa_hadd_u_h(temp0, temp0);
408  comp1 = __msa_hadd_u_h(temp1, temp1);
409  comp2 += comp0;
410  comp3 += comp1;
411  SRARI_H2_UH(comp2, comp3, 2);
412  comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
413  diff = __msa_asub_u_b(src1, comp);
414  sad += __msa_hadd_u_h(diff, diff);
415 
416  ILVRL_B2_UB(ref12, ref02, temp2, temp3);
417  comp2 = __msa_hadd_u_h(temp2, temp2);
418  comp3 = __msa_hadd_u_h(temp3, temp3);
419  comp0 += comp2;
420  comp1 += comp3;
421  SRARI_H2_UH(comp0, comp1, 2);
422  comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
423  diff = __msa_asub_u_b(src2, comp);
424  sad += __msa_hadd_u_h(diff, diff);
425 
426  ILVRL_B2_UB(ref13, ref03, temp0, temp1);
427  comp0 = __msa_hadd_u_h(temp0, temp0);
428  comp1 = __msa_hadd_u_h(temp1, temp1);
429  comp2 += comp0;
430  comp3 += comp1;
431  SRARI_H2_UH(comp2, comp3, 2);
432  comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
433  diff = __msa_asub_u_b(src3, comp);
434  sad += __msa_hadd_u_h(diff, diff);
435 
436  LD_UB4(src, src_stride, src0, src1, src2, src3);
437  src += (4 * src_stride);
438  LD_UB4(ref, ref_stride, ref00, ref01, ref02, ref03);
439  LD_UB4(ref + 1, ref_stride, ref10, ref11, ref12, ref13);
440  ref += (3 * ref_stride);
441 
442  ILVRL_B2_UB(ref10, ref00, temp2, temp3);
443  comp2 = __msa_hadd_u_h(temp2, temp2);
444  comp3 = __msa_hadd_u_h(temp3, temp3);
445  comp0 += comp2;
446  comp1 += comp3;
447  SRARI_H2_UH(comp0, comp1, 2);
448  comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
449  diff = __msa_asub_u_b(src0, comp);
450  sad += __msa_hadd_u_h(diff, diff);
451 
452  ILVRL_B2_UB(ref11, ref01, temp0, temp1);
453  comp0 = __msa_hadd_u_h(temp0, temp0);
454  comp1 = __msa_hadd_u_h(temp1, temp1);
455  comp2 += comp0;
456  comp3 += comp1;
457  SRARI_H2_UH(comp2, comp3, 2);
458  comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
459  diff = __msa_asub_u_b(src1, comp);
460  sad += __msa_hadd_u_h(diff, diff);
461 
462  ILVRL_B2_UB(ref12, ref02, temp2, temp3);
463  comp2 = __msa_hadd_u_h(temp2, temp2);
464  comp3 = __msa_hadd_u_h(temp3, temp3);
465  comp0 += comp2;
466  comp1 += comp3;
467  SRARI_H2_UH(comp0, comp1, 2);
468  comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
469  diff = __msa_asub_u_b(src2, comp);
470  sad += __msa_hadd_u_h(diff, diff);
471 
472  ILVRL_B2_UB(ref13, ref03, temp0, temp1);
473  comp0 = __msa_hadd_u_h(temp0, temp0);
474  comp1 = __msa_hadd_u_h(temp1, temp1);
475  comp2 += comp0;
476  comp3 += comp1;
477  SRARI_H2_UH(comp2, comp3, 2);
478  comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
479  diff = __msa_asub_u_b(src3, comp);
480  sad += __msa_hadd_u_h(diff, diff);
481  }
482  for (; res--; ) {
483  src0 = LD_UB(src);
484  LD_UB2(ref, ref_stride, ref00, ref10);
485  LD_UB2(ref + 1, ref_stride, ref01, ref11);
486  src += src_stride;
487  ref += ref_stride;
488  ILVRL_B2_UB(ref10, ref00, temp0, temp1);
489  ILVRL_B2_UB(ref11, ref01, temp2, temp3);
490  comp0 = __msa_hadd_u_h(temp0, temp0);
491  comp1 = __msa_hadd_u_h(temp1, temp1);
492  comp2 = __msa_hadd_u_h(temp2, temp2);
493  comp3 = __msa_hadd_u_h(temp3, temp3);
494  comp2 += comp0;
495  comp3 += comp1;
496  SRARI_H2_UH(comp2, comp3, 2);
497  comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
498  diff = __msa_asub_u_b(src0, comp);
499  sad += __msa_hadd_u_h(diff, diff);
500  }
501 
502  return (HADD_UH_U32(sad));
503 }
504 
505 #define CALC_MSE_B(src, ref, var) \
506 { \
507  v16u8 src_l0_m, src_l1_m; \
508  v8i16 res_l0_m, res_l1_m; \
509  \
510  ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \
511  HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \
512  DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
513 }
514 
515 static uint32_t sse_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
516  const uint8_t *ref_ptr, int32_t ref_stride,
517  int32_t height)
518 {
519  int32_t ht_cnt = height >> 2;
520  int32_t res = height & 0x03;
521  uint32_t sse;
522  uint32_t src0, src1, src2, src3;
523  uint32_t ref0, ref1, ref2, ref3;
524  v16u8 src = { 0 };
525  v16u8 ref = { 0 };
526  v16u8 zero = { 0 };
527  v4i32 var = { 0 };
528 
529  for (; ht_cnt--; ) {
530  LW4(src_ptr, src_stride, src0, src1, src2, src3);
531  src_ptr += (4 * src_stride);
532  LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
533  ref_ptr += (4 * ref_stride);
534 
535  INSERT_W4_UB(src0, src1, src2, src3, src);
536  INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
537  CALC_MSE_B(src, ref, var);
538  }
539 
540  for (; res--; ) {
541  v16u8 reg0;
542  v8i16 tmp0;
543  src0 = LW(src_ptr);
544  ref0 = LW(ref_ptr);
545  src_ptr += src_stride;
546  ref_ptr += ref_stride;
547  src = (v16u8)__msa_insert_w((v4i32) src, 0, src0);
548  ref = (v16u8)__msa_insert_w((v4i32) ref, 0, ref0);
549  reg0 = (v16u8)__msa_ilvr_b(src, ref);
550  reg0 = (v16u8)__msa_ilvr_d((v2i64) zero, (v2i64) reg0);
551  tmp0 = (v8i16)__msa_hsub_u_h((v16u8) reg0, (v16u8) reg0);
552  var = (v4i32)__msa_dpadd_s_w((v4i32) var, (v8i16) tmp0, (v8i16) tmp0);
553  }
554  sse = HADD_SW_S32(var);
555 
556  return sse;
557 }
558 
559 static uint32_t sse_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
560  const uint8_t *ref_ptr, int32_t ref_stride,
561  int32_t height)
562 {
563  int32_t ht_cnt = height >> 2;
564  int32_t res = height & 0x03;
565  uint32_t sse;
566  v16u8 src0, src1, src2, src3;
567  v16u8 ref0, ref1, ref2, ref3;
568  v4i32 var = { 0 };
569 
570  for (; ht_cnt--; ) {
571  LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
572  src_ptr += (4 * src_stride);
573  LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
574  ref_ptr += (4 * ref_stride);
575 
576  PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2,
577  src0, src1, ref0, ref1);
578  CALC_MSE_B(src0, ref0, var);
579  CALC_MSE_B(src1, ref1, var);
580  }
581 
582  for (; res--; ) {
583  v8i16 tmp0;
584  src0 = LD_UB(src_ptr);
585  ref0 = LD_UB(ref_ptr);
586  src_ptr += src_stride;
587  ref_ptr += ref_stride;
588  ref1 = (v16u8)__msa_ilvr_b(src0, ref0);
589  tmp0 = (v8i16)__msa_hsub_u_h((v16u8) ref1, (v16u8) ref1);
590  var = (v4i32)__msa_dpadd_s_w((v4i32) var, (v8i16) tmp0, (v8i16) tmp0);
591  }
592  sse = HADD_SW_S32(var);
593 
594  return sse;
595 }
596 
597 static uint32_t sse_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
598  const uint8_t *ref_ptr, int32_t ref_stride,
599  int32_t height)
600 {
601  int32_t ht_cnt = height >> 2;
602  int32_t res = height & 0x03;
603  uint32_t sse;
604  v16u8 src, ref;
605  v4i32 var = { 0 };
606 
607  for (; ht_cnt--; ) {
608  src = LD_UB(src_ptr);
609  src_ptr += src_stride;
610  ref = LD_UB(ref_ptr);
611  ref_ptr += ref_stride;
612  CALC_MSE_B(src, ref, var);
613 
614  src = LD_UB(src_ptr);
615  src_ptr += src_stride;
616  ref = LD_UB(ref_ptr);
617  ref_ptr += ref_stride;
618  CALC_MSE_B(src, ref, var);
619 
620  src = LD_UB(src_ptr);
621  src_ptr += src_stride;
622  ref = LD_UB(ref_ptr);
623  ref_ptr += ref_stride;
624  CALC_MSE_B(src, ref, var);
625 
626  src = LD_UB(src_ptr);
627  src_ptr += src_stride;
628  ref = LD_UB(ref_ptr);
629  ref_ptr += ref_stride;
630  CALC_MSE_B(src, ref, var);
631  }
632 
633  for (; res--; ) {
634  src = LD_UB(src_ptr);
635  src_ptr += src_stride;
636  ref = LD_UB(ref_ptr);
637  ref_ptr += ref_stride;
638  CALC_MSE_B(src, ref, var);
639  }
640 
641  sse = HADD_SW_S32(var);
642 
643  return sse;
644 }
645 
646 static int32_t hadamard_diff_8x8_msa(const uint8_t *src, int32_t src_stride,
647  const uint8_t *ref, int32_t ref_stride)
648 {
649  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
650  v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
651  v8u16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
652  v8u16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
653  v8i16 sum = { 0 };
654  v8i16 zero = { 0 };
655 
656  LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
657  LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7);
658  ILVR_B8_UH(src0, ref0, src1, ref1, src2, ref2, src3, ref3,
659  src4, ref4, src5, ref5, src6, ref6, src7, ref7,
660  diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7);
661  HSUB_UB4_UH(diff0, diff1, diff2, diff3, diff0, diff1, diff2, diff3);
662  HSUB_UB4_UH(diff4, diff5, diff6, diff7, diff4, diff5, diff6, diff7);
663  TRANSPOSE8x8_UH_UH(diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7,
664  diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7);
665  BUTTERFLY_8(diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1,
666  temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1);
667  BUTTERFLY_8(temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2,
668  diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2);
669  BUTTERFLY_8(diff0, diff1, diff2, diff3, diff7, diff6, diff5, diff4,
670  temp0, temp1, temp2, temp3, temp7, temp6, temp5, temp4);
671  TRANSPOSE8x8_UH_UH(temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7,
672  temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7);
673  BUTTERFLY_8(temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1,
674  diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1);
675  BUTTERFLY_8(diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2,
676  temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2);
677  ADD4(temp0, temp4, temp1, temp5, temp2, temp6, temp3, temp7,
678  diff0, diff1, diff2, diff3);
679  sum = __msa_asub_s_h((v8i16) temp3, (v8i16) temp7);
680  sum += __msa_asub_s_h((v8i16) temp2, (v8i16) temp6);
681  sum += __msa_asub_s_h((v8i16) temp1, (v8i16) temp5);
682  sum += __msa_asub_s_h((v8i16) temp0, (v8i16) temp4);
683  sum += __msa_add_a_h((v8i16) diff0, zero);
684  sum += __msa_add_a_h((v8i16) diff1, zero);
685  sum += __msa_add_a_h((v8i16) diff2, zero);
686  sum += __msa_add_a_h((v8i16) diff3, zero);
687 
688  return (HADD_UH_U32(sum));
689 }
690 
691 static int32_t hadamard_intra_8x8_msa(const uint8_t *src, int32_t src_stride,
692  const uint8_t *dumy, int32_t ref_stride)
693 {
694  int32_t sum_res = 0;
695  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
696  v8u16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
697  v8u16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
698  v8i16 sum = { 0 };
699  v16i8 zero = { 0 };
700 
701  LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
702  TRANSPOSE8x8_UB_UB(src0, src1, src2, src3, src4, src5, src6, src7,
703  src0, src1, src2, src3, src4, src5, src6, src7);
704  ILVR_B8_UH(zero, src0, zero, src1, zero, src2, zero, src3,
705  zero, src4, zero, src5, zero, src6, zero, src7,
706  diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7);
707  BUTTERFLY_8(diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1,
708  temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1);
709  BUTTERFLY_8(temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2,
710  diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2);
711  BUTTERFLY_8(diff0, diff1, diff2, diff3, diff7, diff6, diff5, diff4,
712  temp0, temp1, temp2, temp3, temp7, temp6, temp5, temp4);
713  TRANSPOSE8x8_UH_UH(temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7,
714  temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7);
715  BUTTERFLY_8(temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1,
716  diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1);
717  BUTTERFLY_8(diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2,
718  temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2);
719  ADD4(temp0, temp4, temp1, temp5, temp2, temp6, temp3, temp7,
720  diff0, diff1, diff2, diff3);
721  sum = __msa_asub_s_h((v8i16) temp3, (v8i16) temp7);
722  sum += __msa_asub_s_h((v8i16) temp2, (v8i16) temp6);
723  sum += __msa_asub_s_h((v8i16) temp1, (v8i16) temp5);
724  sum += __msa_asub_s_h((v8i16) temp0, (v8i16) temp4);
725  sum += __msa_add_a_h((v8i16) diff0, (v8i16) zero);
726  sum += __msa_add_a_h((v8i16) diff1, (v8i16) zero);
727  sum += __msa_add_a_h((v8i16) diff2, (v8i16) zero);
728  sum += __msa_add_a_h((v8i16) diff3, (v8i16) zero);
729  sum_res = (HADD_UH_U32(sum));
730  sum_res -= abs(temp0[0] + temp4[0]);
731 
732  return sum_res;
733 }
734 
735 int ff_pix_abs16_msa(MpegEncContext *v, const uint8_t *src, const uint8_t *ref,
736  ptrdiff_t stride, int height)
737 {
739 }
740 
741 int ff_pix_abs8_msa(MpegEncContext *v, const uint8_t *src, const uint8_t *ref,
742  ptrdiff_t stride, int height)
743 {
745 }
746 
747 int ff_pix_abs16_x2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
748  ptrdiff_t stride, int h)
749 {
751 }
752 
753 int ff_pix_abs16_y2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
754  ptrdiff_t stride, int h)
755 {
756  return sad_vert_bilinear_filter_16width_msa(pix1, stride, pix2, stride, h);
757 }
758 
759 int ff_pix_abs16_xy2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
760  ptrdiff_t stride, int h)
761 {
762  return sad_hv_bilinear_filter_16width_msa(pix1, stride, pix2, stride, h);
763 }
764 
765 int ff_pix_abs8_x2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
766  ptrdiff_t stride, int h)
767 {
768  return sad_horiz_bilinear_filter_8width_msa(pix1, stride, pix2, stride, h);
769 }
770 
771 int ff_pix_abs8_y2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
772  ptrdiff_t stride, int h)
773 {
774  return sad_vert_bilinear_filter_8width_msa(pix1, stride, pix2, stride, h);
775 }
776 
777 int ff_pix_abs8_xy2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
778  ptrdiff_t stride, int h)
779 {
780  return sad_hv_bilinear_filter_8width_msa(pix1, stride, pix2, stride, h);
781 }
782 
783 int ff_sse16_msa(MpegEncContext *v, const uint8_t *src, const uint8_t *ref,
784  ptrdiff_t stride, int height)
785 {
787 }
788 
789 int ff_sse8_msa(MpegEncContext *v, const uint8_t *src, const uint8_t *ref,
790  ptrdiff_t stride, int height)
791 {
793 }
794 
795 int ff_sse4_msa(MpegEncContext *v, const uint8_t *src, const uint8_t *ref,
796  ptrdiff_t stride, int height)
797 {
799 }
800 
801 int ff_hadamard8_diff8x8_msa(MpegEncContext *s, const uint8_t *dst, const uint8_t *src,
802  ptrdiff_t stride, int h)
803 {
804  return hadamard_diff_8x8_msa(src, stride, dst, stride);
805 }
806 
807 int ff_hadamard8_intra8x8_msa(MpegEncContext *s, const uint8_t *src, const uint8_t *dummy,
808  ptrdiff_t stride, int h)
809 {
811 }
812 
813 /* Hadamard Transform functions */
814 #define WRAPPER8_16_SQ(name8, name16) \
815 int name16(MpegEncContext *s, const uint8_t *dst, const uint8_t *src, \
816  ptrdiff_t stride, int h) \
817 { \
818  int score = 0; \
819  score += name8(s, dst, src, stride, 8); \
820  score += name8(s, dst + 8, src + 8, stride, 8); \
821  if(h == 16) { \
822  dst += 8 * stride; \
823  src += 8 * stride; \
824  score +=name8(s, dst, src, stride, 8); \
825  score +=name8(s, dst + 8, src + 8, stride, 8); \
826  } \
827  return score; \
828 }
829 
LD_UB8
#define LD_UB8(...)
Definition: generic_macros_msa.h:335
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:80
PCKEV_D2_UB
#define PCKEV_D2_UB(...)
Definition: generic_macros_msa.h:1787
src1
const pixel * src1
Definition: h264pred_template.c:421
VSHF_B2_UB
#define VSHF_B2_UB(...)
Definition: generic_macros_msa.h:661
AVER_UB2_UB
#define AVER_UB2_UB(...)
Definition: generic_macros_msa.h:595
ff_pix_abs8_y2_msa
int ff_pix_abs8_y2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp_msa.c:771
sad_hv_bilinear_filter_16width_msa
static uint32_t sad_hv_bilinear_filter_16width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:372
ff_sse16_msa
int ff_sse16_msa(MpegEncContext *v, const uint8_t *src, const uint8_t *ref, ptrdiff_t stride, int height)
Definition: me_cmp_msa.c:783
LD_UB5
#define LD_UB5(...)
Definition: generic_macros_msa.h:307
sad_vert_bilinear_filter_16width_msa
static uint32_t sad_vert_bilinear_filter_16width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:249
ff_pix_abs16_x2_msa
int ff_pix_abs16_x2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp_msa.c:747
ADD4
#define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3)
Definition: generic_macros_msa.h:2123
PCKEV_D4_UB
#define PCKEV_D4_UB(...)
Definition: generic_macros_msa.h:1797
HADD_UH_U32
#define HADD_UH_U32(in)
Definition: generic_macros_msa.h:1017
ff_pix_abs16_msa
int ff_pix_abs16_msa(MpegEncContext *v, const uint8_t *src, const uint8_t *ref, ptrdiff_t stride, int height)
Definition: me_cmp_msa.c:735
generic_macros_msa.h
dummy
int dummy
Definition: motion.c:65
sad_horiz_bilinear_filter_8width_msa
static uint32_t sad_horiz_bilinear_filter_8width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:92
LD_UB
#define LD_UB(...)
Definition: generic_macros_msa.h:32
ff_pix_abs16_xy2_msa
int ff_pix_abs16_xy2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp_msa.c:759
me_cmp_mips.h
ff_pix_abs8_x2_msa
int ff_pix_abs8_x2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp_msa.c:765
ILVR_B8_UH
#define ILVR_B8_UH(...)
Definition: generic_macros_msa.h:1374
mask
static const uint16_t mask[17]
Definition: lzw.c:38
s
#define s(width, name)
Definition: cbs_vp9.c:256
ff_sse8_msa
int ff_sse8_msa(MpegEncContext *v, const uint8_t *src, const uint8_t *ref, ptrdiff_t stride, int height)
Definition: me_cmp_msa.c:789
SLDI_B4_UB
#define SLDI_B4_UB(...)
Definition: generic_macros_msa.h:643
sse_4width_msa
static uint32_t sse_4width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:515
HADD_SW_S32
#define HADD_SW_S32(in)
Definition: generic_macros_msa.h:998
ff_hadamard8_diff8x8_msa
int ff_hadamard8_diff8x8_msa(MpegEncContext *s, const uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h)
Definition: me_cmp_msa.c:801
WRAPPER8_16_SQ
#define WRAPPER8_16_SQ(name8, name16)
Definition: me_cmp_msa.c:814
ILVRL_B2_UB
#define ILVRL_B2_UB(...)
Definition: generic_macros_msa.h:1495
hadamard_diff_8x8_msa
static int32_t hadamard_diff_8x8_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride)
Definition: me_cmp_msa.c:646
abs
#define abs(x)
Definition: cuda_runtime.h:35
TRANSPOSE8x8_UB_UB
#define TRANSPOSE8x8_UB_UB(...)
Definition: generic_macros_msa.h:2375
ff_pix_abs8_xy2_msa
int ff_pix_abs8_xy2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp_msa.c:777
ff_hadamard8_intra16_msa
int ff_hadamard8_intra16_msa(MpegEncContext *s, const uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h)
LW
#define LW(psrc)
Definition: generic_macros_msa.h:104
ff_hadamard8_intra8x8_msa
int ff_hadamard8_intra8x8_msa(MpegEncContext *s, const uint8_t *src, const uint8_t *dummy, ptrdiff_t stride, int h)
Definition: me_cmp_msa.c:807
CALC_MSE_B
#define CALC_MSE_B(src, ref, var)
Definition: me_cmp_msa.c:505
ff_hadamard8_diff16_msa
int ff_hadamard8_diff16_msa(MpegEncContext *s, const uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h)
LD_UB4
#define LD_UB4(...)
Definition: generic_macros_msa.h:296
ff_sse4_msa
int ff_sse4_msa(MpegEncContext *v, const uint8_t *src, const uint8_t *ref, ptrdiff_t stride, int height)
Definition: me_cmp_msa.c:795
sad_16width_msa
static uint32_t sad_16width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:58
height
#define height
LW4
#define LW4(psrc, stride, out0, out1, out2, out3)
Definition: generic_macros_msa.h:202
ff_pix_abs16_y2_msa
int ff_pix_abs16_y2_msa(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
Definition: me_cmp_msa.c:753
hadamard_intra_8x8_msa
static int32_t hadamard_intra_8x8_msa(const uint8_t *src, int32_t src_stride, const uint8_t *dumy, int32_t ref_stride)
Definition: me_cmp_msa.c:691
SRARI_H2_UH
#define SRARI_H2_UH(...)
Definition: generic_macros_msa.h:2058
BUTTERFLY_8
#define BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, out5, out6, out7)
Definition: generic_macros_msa.h:2263
src2
const pixel * src2
Definition: h264pred_template.c:422
INSERT_W4_UB
#define INSERT_W4_UB(...)
Definition: generic_macros_msa.h:1153
stride
#define stride
Definition: h264pred_template.c:537
LD_UB2
#define LD_UB2(...)
Definition: generic_macros_msa.h:277
HSUB_UB4_UH
#define HSUB_UB4_UH(...)
Definition: generic_macros_msa.h:1106
sse_8width_msa
static uint32_t sse_8width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:559
sad_vert_bilinear_filter_8width_msa
static uint32_t sad_vert_bilinear_filter_8width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:198
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:112
sse_16width_msa
static uint32_t sse_16width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:597
src0
const pixel *const src0
Definition: h264pred_template.c:420
zero
#define zero
Definition: regdef.h:64
diff
static av_always_inline int diff(const uint32_t a, const uint32_t b)
Definition: vf_palettegen.c:139
sad_hv_bilinear_filter_8width_msa
static uint32_t sad_hv_bilinear_filter_8width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:298
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
int32_t
int32_t
Definition: audioconvert.c:56
sad_horiz_bilinear_filter_16width_msa
static uint32_t sad_horiz_bilinear_filter_16width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:148
sse
static int sse(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, int w, int h, int stride)
Definition: mpegvideo_enc.c:2617
h
h
Definition: vp9dsp_template.c:2038
SAD_UB2_UH
#define SAD_UB2_UH(in0, in1, ref0, ref1)
Definition: generic_macros_msa.h:1118
MpegEncContext
MpegEncContext.
Definition: mpegvideo.h:70
sad_8width_msa
static uint32_t sad_8width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height)
Definition: me_cmp_msa.c:24
ff_pix_abs8_msa
int ff_pix_abs8_msa(MpegEncContext *v, const uint8_t *src, const uint8_t *ref, ptrdiff_t stride, int height)
Definition: me_cmp_msa.c:741
TRANSPOSE8x8_UH_UH
#define TRANSPOSE8x8_UH_UH(...)
Definition: generic_macros_msa.h:2504