FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
h264qpel_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264qpel
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "h264dsp_mips.h"
26 
27 static inline void copy_block4_mmi(uint8_t *dst, const uint8_t *src,
28  int dstStride, int srcStride, int h)
29 {
30  __asm__ volatile (
31  "1: \r\n"
32  "gslwlc1 $f2, 3(%[src]) \r\n"
33  "gslwrc1 $f2, 0(%[src]) \r\n"
34  "gsswlc1 $f2, 3(%[dst]) \r\n"
35  "gsswrc1 $f2, 0(%[dst]) \r\n"
36  "dadd %[src], %[src], %[srcStride] \r\n"
37  "dadd %[dst], %[dst], %[dstStride] \r\n"
38  "daddi %[h], %[h], -1 \r\n"
39  "bnez %[h], 1b \r\n"
40  : [dst]"+&r"(dst),[src]"+&r"(src)
41  : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),[h]"r"(h)
42  : "$f2"
43  );
44 }
45 
46 static inline void copy_block8_mmi(uint8_t *dst, const uint8_t *src,
47  int dstStride, int srcStride, int h)
48 {
49  __asm__ volatile (
50  "1: \r\n"
51  "gsldlc1 $f2, 7(%[src]) \r\n"
52  "gsldrc1 $f2, 0(%[src]) \r\n"
53  "gssdlc1 $f2, 7(%[dst]) \r\n"
54  "gssdrc1 $f2, 0(%[dst]) \r\n"
55  "dadd %[src], %[src], %[srcStride] \r\n"
56  "dadd %[dst], %[dst], %[dstStride] \r\n"
57  "daddi %[h], %[h], -1 \r\n"
58  "bnez %[h], 1b \r\n"
59  : [dst]"+&r"(dst),[src]"+&r"(src)
60  : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),[h]"r"(h)
61  : "$f2"
62  );
63 }
64 
65 static inline void copy_block16_mmi(uint8_t *dst, const uint8_t *src,
66  int dstStride, int srcStride, int h)
67 {
68  __asm__ volatile (
69  "1: \r\n"
70  "gsldlc1 $f2, 7(%[src]) \r\n"
71  "gsldrc1 $f2, 0(%[src]) \r\n"
72  "gsldlc1 $f4, 15(%[src]) \r\n"
73  "gsldrc1 $f4, 8(%[src]) \r\n"
74  "gssdlc1 $f2, 7(%[dst]) \r\n"
75  "gssdrc1 $f2, 0(%[dst]) \r\n"
76  "gssdlc1 $f4, 15(%[dst]) \r\n"
77  "gssdrc1 $f4, 8(%[dst]) \r\n"
78  "dadd %[src], %[src], %[srcStride] \r\n"
79  "dadd %[dst], %[dst], %[dstStride] \r\n"
80  "daddi %[h], %[h], -1 \r\n"
81  "bnez %[h], 1b \r\n"
82  : [dst]"+&r"(dst),[src]"+&r"(src)
83  : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),[h]"r"(h)
84  : "$f2","$f4"
85  );
86 }
87 
88 #define op_put(a, b) a = b
89 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
90 static inline void put_pixels4_mmi(uint8_t *block, const uint8_t *pixels,
91  ptrdiff_t line_size, int h)
92 {
93  __asm__ volatile (
94  "1: \r\n"
95  "gslwlc1 $f2, 3(%[pixels]) \r\n"
96  "gslwrc1 $f2, 0(%[pixels]) \r\n"
97  "gsswlc1 $f2, 3(%[block]) \r\n"
98  "gsswrc1 $f2, 0(%[block]) \r\n"
99  "dadd %[pixels], %[pixels], %[line_size]\r\n"
100  "dadd %[block], %[block], %[line_size] \r\n"
101  "daddi %[h], %[h], -1 \r\n"
102  "bnez %[h], 1b \r\n"
103  : [block]"+&r"(block),[pixels]"+&r"(pixels)
104  : [line_size]"r"(line_size),[h]"r"(h)
105  : "$f2"
106  );
107 }
108 
109 static inline void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels,
110  ptrdiff_t line_size, int h)
111 {
112  __asm__ volatile (
113  "1: \r\n"
114  "gsldlc1 $f2, 7(%[pixels]) \r\n"
115  "gsldrc1 $f2, 0(%[pixels]) \r\n"
116  "gssdlc1 $f2, 7(%[block]) \r\n"
117  "gssdrc1 $f2, 0(%[block]) \r\n"
118  "dadd %[pixels], %[pixels], %[line_size]\r\n"
119  "dadd %[block], %[block], %[line_size] \r\n"
120  "daddi %[h], %[h], -1 \r\n"
121  "bnez %[h], 1b \r\n"
122  : [block]"+&r"(block),[pixels]"+&r"(pixels)
123  : [line_size]"r"(line_size),[h]"r"(h)
124  : "$f2"
125  );
126 }
127 
128 static inline void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels,
129  ptrdiff_t line_size, int h)
130 {
131  __asm__ volatile (
132  "1: \r\n"
133  "gsldlc1 $f2, 7(%[pixels]) \r\n"
134  "gsldrc1 $f2, 0(%[pixels]) \r\n"
135  "gsldlc1 $f4, 15(%[pixels]) \r\n"
136  "gsldrc1 $f4, 8(%[pixels]) \r\n"
137  "gssdlc1 $f2, 7(%[block]) \r\n"
138  "gssdrc1 $f2, 0(%[block]) \r\n"
139  "gssdlc1 $f4, 15(%[block]) \r\n"
140  "gssdrc1 $f4, 8(%[block]) \r\n"
141  "dadd %[pixels], %[pixels], %[line_size]\r\n"
142  "dadd %[block], %[block], %[line_size] \r\n"
143  "daddi %[h], %[h], -1 \r\n"
144  "bnez %[h], 1b \r\n"
145  : [block]"+&r"(block),[pixels]"+&r"(pixels)
146  : [line_size]"r"(line_size),[h]"r"(h)
147  : "$f2","$f4"
148  );
149 }
150 
151 static inline void avg_pixels4_mmi(uint8_t *block, const uint8_t *pixels,
152  ptrdiff_t line_size, int h)
153 {
154  __asm__ volatile (
155  "1: \r\n"
156  "gslwlc1 $f2, 3(%[pixels]) \r\n"
157  "gslwrc1 $f2, 0(%[pixels]) \r\n"
158  "gslwlc1 $f4, 3(%[block]) \r\n"
159  "gslwrc1 $f4, 0(%[block]) \r\n"
160  "pavgb $f2, $f2, $f4 \r\n"
161  "gsswlc1 $f2, 3(%[block]) \r\n"
162  "gsswrc1 $f2, 0(%[block]) \r\n"
163  "dadd %[pixels], %[pixels], %[line_size]\r\n"
164  "dadd %[block], %[block], %[line_size] \r\n"
165  "daddi %[h], %[h], -1 \r\n"
166  "bnez %[h], 1b \r\n"
167  : [block]"+&r"(block),[pixels]"+&r"(pixels)
168  : [line_size]"r"(line_size),[h]"r"(h)
169  : "$f2","$f4"
170  );
171 }
172 
173 static inline void avg_pixels8_mmi(uint8_t *block, const uint8_t *pixels,
174  ptrdiff_t line_size, int h)
175 {
176  __asm__ volatile (
177  "1: \r\n"
178  "gsldlc1 $f2, 7(%[block]) \r\n"
179  "gsldrc1 $f2, 0(%[block]) \r\n"
180  "gsldlc1 $f4, 7(%[pixels]) \r\n"
181  "gsldrc1 $f4, 0(%[pixels]) \r\n"
182  "pavgb $f2, $f2, $f4 \r\n"
183  "gssdlc1 $f2, 7(%[block]) \r\n"
184  "gssdrc1 $f2, 0(%[block]) \r\n"
185  "dadd %[pixels], %[pixels], %[line_size]\r\n"
186  "dadd %[block], %[block], %[line_size] \r\n"
187  "daddi %[h], %[h], -1 \r\n"
188  "bnez %[h], 1b \r\n"
189  : [block]"+&r"(block),[pixels]"+&r"(pixels)
190  : [line_size]"r"(line_size),[h]"r"(h)
191  : "$f2","$f4"
192  );
193 }
194 
195 static inline void avg_pixels16_mmi(uint8_t *block, const uint8_t *pixels,
196  ptrdiff_t line_size, int h)
197 {
198  __asm__ volatile (
199  "1: \r\n"
200  "gsldlc1 $f2, 7(%[block]) \r\n"
201  "gsldrc1 $f2, 0(%[block]) \r\n"
202  "gsldlc1 $f4, 15(%[block]) \r\n"
203  "gsldrc1 $f4, 8(%[block]) \r\n"
204  "gsldlc1 $f6, 7(%[pixels]) \r\n"
205  "gsldrc1 $f6, 0(%[pixels]) \r\n"
206  "gsldlc1 $f8, 15(%[pixels]) \r\n"
207  "gsldrc1 $f8, 8(%[pixels]) \r\n"
208  "pavgb $f2, $f2, $f6 \r\n"
209  "pavgb $f4, $f4, $f8 \r\n"
210  "gssdlc1 $f2, 7(%[block]) \r\n"
211  "gssdrc1 $f2, 0(%[block]) \r\n"
212  "gssdlc1 $f4, 15(%[block]) \r\n"
213  "gssdrc1 $f4, 8(%[block]) \r\n"
214  "dadd %[pixels], %[pixels], %[line_size]\r\n"
215  "dadd %[block], %[block], %[line_size] \r\n"
216  "daddi %[h], %[h], -1 \r\n"
217  "bnez %[h], 1b \r\n"
218  : [block]"+&r"(block),[pixels]"+&r"(pixels)
219  : [line_size]"r"(line_size),[h]"r"(h)
220  : "$f2","$f4","$f6","$f8"
221  );
222 }
223 
224 static inline void put_pixels4_l2_mmi(uint8_t *dst, const uint8_t *src1,
225  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
226  int h)
227 {
228  int i;
229  for (i = 0; i < h; i++) {
230  pixel4 a, b;
231  a = AV_RN4P(&src1[i * src_stride1]);
232  b = AV_RN4P(&src2[i * src_stride2]);
233  op_put(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
234  }
235 }
236 
237 static inline void put_pixels8_l2_mmi(uint8_t *dst, const uint8_t *src1,
238  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
239  int h)
240 {
241  int i;
242  for (i = 0; i < h; i++) {
243  pixel4 a, b;
244  a = AV_RN4P(&src1[i * src_stride1]);
245  b = AV_RN4P(&src2[i * src_stride2]);
246  op_put(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
247  a = AV_RN4P(&src1[i * src_stride1 + 4]);
248  b = AV_RN4P(&src2[i * src_stride2 + 4]);
249  op_put(*((pixel4 *) &dst[i * dst_stride + 4]), rnd_avg_pixel4(a, b));
250  }
251 }
252 
253 static inline void put_pixels16_l2_mmi(uint8_t *dst, const uint8_t *src1,
254  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
255  int h)
256 {
257  int i;
258  for (i = 0; i < h; i++) {
259  pixel4 a, b;
260  a = AV_RN4P(&src1[i * src_stride1]);
261  b = AV_RN4P(&src2[i * src_stride2]);
262  op_put(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
263  a = AV_RN4P(&src1[i * src_stride1 + 4]);
264  b = AV_RN4P(&src2[i * src_stride2 + 4]);
265  op_put(*((pixel4 *) &dst[i * dst_stride + 4]), rnd_avg_pixel4(a, b));
266  a = AV_RN4P(&src1[i * src_stride1 + 8]);
267  b = AV_RN4P(&src2[i * src_stride2 + 8]);
268  op_put(*((pixel4 *) &dst[i * dst_stride + 8]), rnd_avg_pixel4(a, b));
269  a = AV_RN4P(&src1[i * src_stride1 + 12]);
270  b = AV_RN4P(&src2[i * src_stride2 + 12]);
271  op_put(*((pixel4 *) &dst[i * dst_stride + 12]), rnd_avg_pixel4(a, b));
272  }
273 }
274 
275 static inline void avg_pixels4_l2_mmi(uint8_t *dst, const uint8_t *src1,
276  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
277  int h)
278 {
279  int i;
280  for (i = 0; i < h; i++) {
281  pixel4 a, b;
282  a = AV_RN4P(&src1[i * src_stride1]);
283  b = AV_RN4P(&src2[i * src_stride2]);
284  op_avg(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
285  }
286 }
287 
288 static inline void avg_pixels8_l2_mmi(uint8_t *dst, const uint8_t *src1,
289  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
290  int h)
291 {
292  int i;
293  for (i = 0; i < h; i++) {
294  pixel4 a, b;
295  a = AV_RN4P(&src1[i * src_stride1]);
296  b = AV_RN4P(&src2[i * src_stride2]);
297  op_avg(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
298  a = AV_RN4P(&src1[i * src_stride1 + 4]);
299  b = AV_RN4P(&src2[i * src_stride2 + 4]);
300  op_avg(*((pixel4 *) &dst[i * dst_stride + 4]), rnd_avg_pixel4(a, b));
301  }
302 }
303 
304 static inline void avg_pixels16_l2_mmi(uint8_t *dst, const uint8_t *src1,
305  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
306  int h)
307 {
308  int i;
309  for (i = 0; i < h; i++) {
310  pixel4 a, b;
311  a = AV_RN4P(&src1[i * src_stride1]);
312  b = AV_RN4P(&src2[i * src_stride2]);
313  op_avg(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
314  a = AV_RN4P(&src1[i * src_stride1 + 4]);
315  b = AV_RN4P(&src2[i * src_stride2 + 4]);
316  op_avg(*((pixel4 *) &dst[i * dst_stride + 4]), rnd_avg_pixel4(a, b));
317  a = AV_RN4P(&src1[i * src_stride1 + 8]);
318  b = AV_RN4P(&src2[i * src_stride2 + 8]);
319  op_avg(*((pixel4 *) &dst[i * dst_stride + 8]), rnd_avg_pixel4(a, b));
320  a = AV_RN4P(&src1[i * src_stride1 + 12]);
321  b = AV_RN4P(&src2[i * src_stride2 + 12]);
322  op_avg(*((pixel4 *) &dst[i * dst_stride + 12]), rnd_avg_pixel4(a, b));
323 
324  }
325 }
326 #undef op_put
327 #undef op_avg
328 
329 #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
330 #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
332  int dstStride, int srcStride)
333 {
334  __asm__ volatile (
335  "xor $f0, $f0, $f0 \r\n"
336  "dli $8, 4 \r\n"
337  "1: \r\n"
338  "gslwlc1 $f2, 1(%[src]) \r\n"
339  "gslwrc1 $f2, -2(%[src]) \r\n"
340  "gslwlc1 $f4, 2(%[src]) \r\n"
341  "gslwrc1 $f4, -1(%[src]) \r\n"
342  "gslwlc1 $f6, 3(%[src]) \r\n"
343  "gslwrc1 $f6, 0(%[src]) \r\n"
344  "gslwlc1 $f8, 4(%[src]) \r\n"
345  "gslwrc1 $f8, 1(%[src]) \r\n"
346  "gslwlc1 $f10, 5(%[src]) \r\n"
347  "gslwrc1 $f10, 2(%[src]) \r\n"
348  "gslwlc1 $f12, 6(%[src]) \r\n"
349  "gslwrc1 $f12, 3(%[src]) \r\n"
350  "punpcklbh $f2, $f2, $f0 \r\n"
351  "punpcklbh $f4, $f4, $f0 \r\n"
352  "punpcklbh $f6, $f6, $f0 \r\n"
353  "punpcklbh $f8, $f8, $f0 \r\n"
354  "punpcklbh $f10, $f10, $f0 \r\n"
355  "punpcklbh $f12, $f12, $f0 \r\n"
356  "paddsh $f14, $f6, $f8 \r\n"
357  "paddsh $f16, $f4, $f10 \r\n"
358  "paddsh $f18, $f2, $f12 \r\n"
359  "pmullh $f14, $f14, %[ff_pw_20] \r\n"
360  "pmullh $f16, $f16, %[ff_pw_5] \r\n"
361  "psubsh $f14, $f14, $f16 \r\n"
362  "paddsh $f18, $f14, $f18 \r\n"
363  "paddsh $f18, $f18, %[ff_pw_16] \r\n"
364  "psrah $f18, $f18, %[ff_pw_5] \r\n"
365  "packushb $f18, $f18, $f0 \r\n"
366  "gsswlc1 $f18, 3(%[dst]) \r\n"
367  "gsswrc1 $f18, 0(%[dst]) \r\n"
368  "dadd %[dst], %[dst], %[dstStride] \r\n"
369  "dadd %[src], %[src], %[srcStride] \r\n"
370  "daddi $8, $8, -1 \r\n"
371  "bnez $8, 1b \r\n"
372  : [dst]"+&r"(dst),[src]"+&r"(src)
373  : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),
375  : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
376  "$f18"
377  );
378 }
379 
381  int dstStride, int srcStride)
382 {
383  __asm__ volatile (
384  "xor $f0, $f0, $f0 \r\n"
385  "dli $8, 8 \r\n"
386  "1: \r\n"
387  "gsldlc1 $f2, 5(%[src]) \r\n"
388  "gsldrc1 $f2, -2(%[src]) \r\n"
389  "gsldlc1 $f4, 6(%[src]) \r\n"
390  "gsldrc1 $f4, -1(%[src]) \r\n"
391  "gsldlc1 $f6, 7(%[src]) \r\n"
392  "gsldrc1 $f6, 0(%[src]) \r\n"
393  "gsldlc1 $f8, 8(%[src]) \r\n"
394  "gsldrc1 $f8, 1(%[src]) \r\n"
395  "gsldlc1 $f10, 9(%[src]) \r\n"
396  "gsldrc1 $f10, 2(%[src]) \r\n"
397  "gsldlc1 $f12, 10(%[src]) \r\n"
398  "gsldrc1 $f12, 3(%[src]) \r\n"
399  "punpcklbh $f14, $f6, $f0 \r\n"
400  "punpckhbh $f16, $f6, $f0 \r\n"
401  "punpcklbh $f18, $f8, $f0 \r\n"
402  "punpckhbh $f20, $f8, $f0 \r\n"
403  "paddsh $f6, $f14, $f18 \r\n"
404  "paddsh $f8, $f16, $f20 \r\n"
405  "pmullh $f6, $f6, %[ff_pw_20] \r\n"
406  "pmullh $f8, $f8, %[ff_pw_20] \r\n"
407  "punpcklbh $f14, $f4, $f0 \r\n"
408  "punpckhbh $f16, $f4, $f0 \r\n"
409  "punpcklbh $f18, $f10, $f0 \r\n"
410  "punpckhbh $f20, $f10, $f0 \r\n"
411  "paddsh $f4, $f14, $f18 \r\n"
412  "paddsh $f10, $f16, $f20 \r\n"
413  "pmullh $f4, $f4, %[ff_pw_5] \r\n"
414  "pmullh $f10, $f10, %[ff_pw_5] \r\n"
415  "punpcklbh $f14, $f2, $f0 \r\n"
416  "punpckhbh $f16, $f2, $f0 \r\n"
417  "punpcklbh $f18, $f12, $f0 \r\n"
418  "punpckhbh $f20, $f12, $f0 \r\n"
419  "paddsh $f2, $f14, $f18 \r\n"
420  "paddsh $f12, $f16, $f20 \r\n"
421  "psubsh $f6, $f6, $f4 \r\n"
422  "psubsh $f8, $f8, $f10 \r\n"
423  "paddsh $f6, $f6, $f2 \r\n"
424  "paddsh $f8, $f8, $f12 \r\n"
425  "paddsh $f6, $f6, %[ff_pw_16] \r\n"
426  "paddsh $f8, $f8, %[ff_pw_16] \r\n"
427  "psrah $f6, $f6, %[ff_pw_5] \r\n"
428  "psrah $f8, $f8, %[ff_pw_5] \r\n"
429  "packushb $f18, $f6, $f8 \r\n"
430  "sdc1 $f18, 0(%[dst]) \r\n"
431  "dadd %[dst], %[dst], %[dstStride] \r\n"
432  "dadd %[src], %[src], %[srcStride] \r\n"
433  "daddi $8, $8, -1 \r\n"
434  "bnez $8, 1b \r\n"
435  : [dst]"+&r"(dst),[src]"+&r"(src)
436  : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),
438  : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
439  "$f18","$f20"
440  );
441 }
442 
444  int dstStride, int srcStride)
445 {
446  put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
447  put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
448  src += 8*srcStride;
449  dst += 8*dstStride;
450  put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
451  put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
452 }
453 
455  int dstStride, int srcStride)
456 {
457  __asm__ volatile (
458  "xor $f0, $f0, $f0 \r\n"
459  "dli $8, 4 \r\n"
460  "1: \r\n"
461  "gslwlc1 $f2, 1(%[src]) \r\n"
462  "gslwrc1 $f2, -2(%[src]) \r\n"
463  "gslwlc1 $f4, 2(%[src]) \r\n"
464  "gslwrc1 $f4, -1(%[src]) \r\n"
465  "gslwlc1 $f6, 3(%[src]) \r\n"
466  "gslwrc1 $f6, 0(%[src]) \r\n"
467  "gslwlc1 $f8, 4(%[src]) \r\n"
468  "gslwrc1 $f8, 1(%[src]) \r\n"
469  "gslwlc1 $f10, 5(%[src]) \r\n"
470  "gslwrc1 $f10, 2(%[src]) \r\n"
471  "gslwlc1 $f12, 6(%[src]) \r\n"
472  "gslwrc1 $f12, 3(%[src]) \r\n"
473  "punpcklbh $f2, $f2, $f0 \r\n"
474  "punpcklbh $f4, $f4, $f0 \r\n"
475  "punpcklbh $f6, $f6, $f0 \r\n"
476  "punpcklbh $f8, $f8, $f0 \r\n"
477  "punpcklbh $f10, $f10, $f0 \r\n"
478  "punpcklbh $f12, $f12, $f0 \r\n"
479  "paddsh $f14, $f6, $f8 \r\n"
480  "paddsh $f16, $f4, $f10 \r\n"
481  "paddsh $f18, $f2, $f12 \r\n"
482  "pmullh $f14, $f14, %[ff_pw_20] \r\n"
483  "pmullh $f16, $f16, %[ff_pw_5] \r\n"
484  "psubsh $f14, $f14, $f16 \r\n"
485  "paddsh $f18, $f14, $f18 \r\n"
486  "paddsh $f18, $f18, %[ff_pw_16] \r\n"
487  "psrah $f18, $f18, %[ff_pw_5] \r\n"
488  "packushb $f18, $f18, $f0 \r\n"
489  "lwc1 $f20, 0(%[dst]) \r\n"
490  "pavgb $f18, $f18, $f20 \r\n"
491  "gsswlc1 $f18, 3(%[dst]) \r\n"
492  "gsswrc1 $f18, 0(%[dst]) \r\n"
493  "dadd %[dst], %[dst], %[dstStride] \r\n"
494  "dadd %[src], %[src], %[srcStride] \r\n"
495  "daddi $8, $8, -1 \r\n"
496  "bnez $8, 1b \r\n"
497  : [dst]"+&r"(dst),[src]"+&r"(src)
498  : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),
500  : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
501  "$f18","$f20"
502  );
503 }
504 
506  int dstStride, int srcStride)
507 {
508  __asm__ volatile (
509  "xor $f0, $f0, $f0 \r\n"
510  "dli $8, 8 \r\n"
511  "1: \r\n"
512  "gsldlc1 $f2, 5(%[src]) \r\n"
513  "gsldrc1 $f2, -2(%[src]) \r\n"
514  "gsldlc1 $f4, 6(%[src]) \r\n"
515  "gsldrc1 $f4, -1(%[src]) \r\n"
516  "gsldlc1 $f6, 7(%[src]) \r\n"
517  "gsldrc1 $f6, 0(%[src]) \r\n"
518  "gsldlc1 $f8, 8(%[src]) \r\n"
519  "gsldrc1 $f8, 1(%[src]) \r\n"
520  "gsldlc1 $f10, 9(%[src]) \r\n"
521  "gsldrc1 $f10, 2(%[src]) \r\n"
522  "gsldlc1 $f12, 10(%[src]) \r\n"
523  "gsldrc1 $f12, 3(%[src]) \r\n"
524  "punpcklbh $f14, $f6, $f0 \r\n"
525  "punpckhbh $f16, $f6, $f0 \r\n"
526  "punpcklbh $f18, $f8, $f0 \r\n"
527  "punpckhbh $f20, $f8, $f0 \r\n"
528  "paddsh $f6, $f14, $f18 \r\n"
529  "paddsh $f8, $f16, $f20 \r\n"
530  "pmullh $f6, $f6, %[ff_pw_20] \r\n"
531  "pmullh $f8, $f8, %[ff_pw_20] \r\n"
532  "punpcklbh $f14, $f4, $f0 \r\n"
533  "punpckhbh $f16, $f4, $f0 \r\n"
534  "punpcklbh $f18, $f10, $f0 \r\n"
535  "punpckhbh $f20, $f10, $f0 \r\n"
536  "paddsh $f4, $f14, $f18 \r\n"
537  "paddsh $f10, $f16, $f20 \r\n"
538  "pmullh $f4, $f4, %[ff_pw_5] \r\n"
539  "pmullh $f10, $f10, %[ff_pw_5] \r\n"
540  "punpcklbh $f14, $f2, $f0 \r\n"
541  "punpckhbh $f16, $f2, $f0 \r\n"
542  "punpcklbh $f18, $f12, $f0 \r\n"
543  "punpckhbh $f20, $f12, $f0 \r\n"
544  "paddsh $f2, $f14, $f18 \r\n"
545  "paddsh $f12, $f16, $f20 \r\n"
546  "psubsh $f6, $f6, $f4 \r\n"
547  "psubsh $f8, $f8, $f10 \r\n"
548  "paddsh $f6, $f6, $f2 \r\n"
549  "paddsh $f8, $f8, $f12 \r\n"
550  "paddsh $f6, $f6, %[ff_pw_16] \r\n"
551  "paddsh $f8, $f8, %[ff_pw_16] \r\n"
552  "psrah $f6, $f6, %[ff_pw_5] \r\n"
553  "psrah $f8, $f8, %[ff_pw_5] \r\n"
554  "packushb $f18, $f6, $f8 \r\n"
555  "ldc1 $f20, 0(%[dst]) \r\n"
556  "pavgb $f18, $f18, $f20 \r\n"
557  "sdc1 $f18, 0(%[dst]) \r\n"
558  "dadd %[dst], %[dst], %[dstStride] \r\n"
559  "dadd %[src], %[src], %[srcStride] \r\n"
560  "daddi $8, $8, -1 \r\n"
561  "bnez $8, 1b \r\n"
562  : [dst]"+&r"(dst),[src]"+&r"(src)
563  : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),
565  : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
566  "$f18","$f20"
567  );
568 }
569 
571  int dstStride, int srcStride)
572 {
573  avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
574  avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
575  src += 8*srcStride;
576  dst += 8*dstStride;
577  avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
578  avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
579 }
580 
582  int dstStride, int srcStride)
583 {
584  __asm__ volatile (
585  "xor $f0, $f0, $f0 \r\n"
586  "gslwlc1 $f2, 3(%[srcB]) \r\n"
587  "gslwrc1 $f2, 0(%[srcB]) \r\n"
588  "gslwlc1 $f4, 3(%[srcA]) \r\n"
589  "gslwrc1 $f4, 0(%[srcA]) \r\n"
590  "gslwlc1 $f6, 3(%[src0]) \r\n"
591  "gslwrc1 $f6, 0(%[src0]) \r\n"
592  "gslwlc1 $f8, 3(%[src1]) \r\n"
593  "gslwrc1 $f8, 0(%[src1]) \r\n"
594  "gslwlc1 $f10, 3(%[src2]) \r\n"
595  "gslwrc1 $f10, 0(%[src2]) \r\n"
596  "gslwlc1 $f12, 3(%[src3]) \r\n"
597  "gslwrc1 $f12, 0(%[src3]) \r\n"
598  "gslwlc1 $f14, 3(%[src4]) \r\n"
599  "gslwrc1 $f14, 0(%[src4]) \r\n"
600  "gslwlc1 $f16, 3(%[src5]) \r\n"
601  "gslwrc1 $f16, 0(%[src5]) \r\n"
602  "gslwlc1 $f18, 3(%[src6]) \r\n"
603  "gslwrc1 $f18, 0(%[src6]) \r\n"
604  "punpcklbh $f2, $f2, $f0 \r\n"
605  "punpcklbh $f4, $f4, $f0 \r\n"
606  "punpcklbh $f6, $f6, $f0 \r\n"
607  "punpcklbh $f8, $f8, $f0 \r\n"
608  "punpcklbh $f10, $f10, $f0 \r\n"
609  "punpcklbh $f12, $f12, $f0 \r\n"
610  "punpcklbh $f14, $f14, $f0 \r\n"
611  "punpcklbh $f16, $f16, $f0 \r\n"
612  "punpcklbh $f18, $f18, $f0 \r\n"
613  "paddsh $f20, $f6, $f8 \r\n"
614  "pmullh $f20, $f20, %[ff_pw_20] \r\n"
615  "paddsh $f22, $f4, $f10 \r\n"
616  "pmullh $f22, $f22, %[ff_pw_5] \r\n"
617  "psubsh $f24, $f20, $f22 \r\n"
618  "paddsh $f24, $f24, $f2 \r\n"
619  "paddsh $f24, $f24, $f12 \r\n"
620  "paddsh $f20, $f8, $f10 \r\n"
621  "pmullh $f20, $f20, %[ff_pw_20] \r\n"
622  "paddsh $f22, $f6, $f12 \r\n"
623  "pmullh $f22, $f22, %[ff_pw_5] \r\n"
624  "psubsh $f26, $f20, $f22 \r\n"
625  "paddsh $f26, $f26, $f4 \r\n"
626  "paddsh $f26, $f26, $f14 \r\n"
627  "paddsh $f20, $f10, $f12 \r\n"
628  "pmullh $f20, $f20, %[ff_pw_20] \r\n"
629  "paddsh $f22, $f8, $f14 \r\n"
630  "pmullh $f22, $f22, %[ff_pw_5] \r\n"
631  "psubsh $f28, $f20, $f22 \r\n"
632  "paddsh $f28, $f28, $f6 \r\n"
633  "paddsh $f28, $f28, $f16 \r\n"
634  "paddsh $f20, $f12, $f14 \r\n"
635  "pmullh $f20, $f20, %[ff_pw_20] \r\n"
636  "paddsh $f22, $f10, $f16 \r\n"
637  "pmullh $f22, $f22, %[ff_pw_5] \r\n"
638  "psubsh $f30, $f20, $f22 \r\n"
639  "paddsh $f30, $f30, $f8 \r\n"
640  "paddsh $f30, $f30, $f18 \r\n"
641  "paddsh $f24, $f24, %[ff_pw_16] \r\n"
642  "paddsh $f26, $f26, %[ff_pw_16] \r\n"
643  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
644  "paddsh $f30, $f30, %[ff_pw_16] \r\n"
645  "psrah $f24, $f24, %[ff_pw_5] \r\n"
646  "psrah $f26, $f26, %[ff_pw_5] \r\n"
647  "psrah $f28, $f28, %[ff_pw_5] \r\n"
648  "psrah $f30, $f30, %[ff_pw_5] \r\n"
649  "packushb $f24, $f24, $f0 \r\n"
650  "packushb $f26, $f26, $f0 \r\n"
651  "packushb $f28, $f28, $f0 \r\n"
652  "packushb $f30, $f30, $f0 \r\n"
653  "swc1 $f24, 0(%[dst0]) \r\n"
654  "swc1 $f26, 0(%[dst1]) \r\n"
655  "swc1 $f28, 0(%[dst2]) \r\n"
656  "swc1 $f30, 0(%[dst3]) \r\n"
657  ::[dst0]"r"(dst), [dst1]"r"(dst+dstStride),
658  [dst2]"r"(dst+2*dstStride), [dst3]"r"(dst+3*dstStride),
659  [srcB]"r"(src-2*srcStride), [srcA]"r"(src-srcStride),
660  [src0]"r"(src), [src1]"r"(src+srcStride),
661  [src2]"r"(src+2*srcStride), [src3]"r"(src+3*srcStride),
662  [src4]"r"(src+4*srcStride), [src5]"r"(src+5*srcStride),
663  [src6]"r"(src+6*srcStride), [ff_pw_20]"f"(ff_pw_20),
664  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
665  : "$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16","$f18",
666  "$f20","$f22","$f24","$f26","$f28","$f30"
667  );
668 }
669 
671  int dstStride, int srcStride)
672 {
673  __asm__ volatile (
674  "xor $f0, $f0, $f0 \r\n"
675  "gsldlc1 $f2, 7(%[srcB]) \r\n"
676  "gsldrc1 $f2, 0(%[srcB]) \r\n"
677  "gsldlc1 $f4, 7(%[srcA]) \r\n"
678  "gsldrc1 $f4, 0(%[srcA]) \r\n"
679  "gsldlc1 $f6, 7(%[src0]) \r\n"
680  "gsldrc1 $f6, 0(%[src0]) \r\n"
681  "gsldlc1 $f8, 7(%[src1]) \r\n"
682  "gsldrc1 $f8, 0(%[src1]) \r\n"
683  "gsldlc1 $f10, 7(%[src2]) \r\n"
684  "gsldrc1 $f10, 0(%[src2]) \r\n"
685  "gsldlc1 $f12, 7(%[src3]) \r\n"
686  "gsldrc1 $f12, 0(%[src3]) \r\n"
687  "gsldlc1 $f14, 7(%[src4]) \r\n"
688  "gsldrc1 $f14, 0(%[src4]) \r\n"
689  "gsldlc1 $f16, 7(%[src5]) \r\n"
690  "gsldrc1 $f16, 0(%[src5]) \r\n"
691  "gsldlc1 $f18, 7(%[src6]) \r\n"
692  "gsldrc1 $f18, 0(%[src6]) \r\n"
693  "gsldlc1 $f20, 7(%[src7]) \r\n"
694  "gsldrc1 $f20, 0(%[src7]) \r\n"
695  "gsldlc1 $f22, 7(%[src8]) \r\n"
696  "gsldrc1 $f22, 0(%[src8]) \r\n"
697  "gsldlc1 $f24, 7(%[src9]) \r\n"
698  "gsldrc1 $f24, 0(%[src9]) \r\n"
699  "gsldlc1 $f26, 7(%[src10]) \r\n"
700  "gsldrc1 $f26, 0(%[src10]) \r\n"
701  "punpcklbh $f1, $f2, $f0 \r\n"
702  "punpckhbh $f2, $f2, $f0 \r\n"
703  "punpcklbh $f3, $f4, $f0 \r\n"
704  "punpckhbh $f4, $f4, $f0 \r\n"
705  "punpcklbh $f5, $f6, $f0 \r\n"
706  "punpckhbh $f6, $f6, $f0 \r\n"
707  "punpcklbh $f7, $f8, $f0 \r\n"
708  "punpckhbh $f8, $f8, $f0 \r\n"
709  "punpcklbh $f9, $f10, $f0 \r\n"
710  "punpckhbh $f10, $f10, $f0 \r\n"
711  "punpcklbh $f11, $f12, $f0 \r\n"
712  "punpckhbh $f12, $f12, $f0 \r\n"
713  "punpcklbh $f13, $f14, $f0 \r\n"
714  "punpckhbh $f14, $f14, $f0 \r\n"
715  "punpcklbh $f15, $f16, $f0 \r\n"
716  "punpckhbh $f16, $f16, $f0 \r\n"
717  "punpcklbh $f17, $f18, $f0 \r\n"
718  "punpckhbh $f18, $f18, $f0 \r\n"
719  "punpcklbh $f19, $f20, $f0 \r\n"
720  "punpckhbh $f20, $f20, $f0 \r\n"
721  "punpcklbh $f21, $f22, $f0 \r\n"
722  "punpckhbh $f22, $f22, $f0 \r\n"
723  "punpcklbh $f23, $f24, $f0 \r\n"
724  "punpckhbh $f24, $f24, $f0 \r\n"
725  "punpcklbh $f25, $f26, $f0 \r\n"
726  "punpckhbh $f26, $f26, $f0 \r\n"
727  "paddsh $f27, $f5, $f7 \r\n"
728  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
729  "paddsh $f28, $f6, $f8 \r\n"//src0+src1
730  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
731  "psubsh $f27, $f27, $f3 \r\n"
732  "psubsh $f28, $f28, $f4 \r\n"
733  "psubsh $f27, $f27, $f9 \r\n"
734  "psubsh $f28, $f28, $f10 \r\n"
735  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
736  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
737  "paddsh $f27, $f27, $f1 \r\n"
738  "paddsh $f28, $f28, $f2 \r\n"
739  "paddsh $f27, $f27, $f11 \r\n"
740  "paddsh $f28, $f28, $f12 \r\n"
741  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
742  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
743  "psrah $f27, $f27, %[ff_pw_5] \r\n"
744  "psrah $f28, $f28, %[ff_pw_5] \r\n"
745  "packushb $f27, $f27, $f0 \r\n"
746  "packushb $f28, $f28, $f0 \r\n"
747  "punpcklwd $f2, $f27, $f28 \r\n"
748  "sdc1 $f2, 0(%[dst0]) \r\n"
749  "paddsh $f27, $f7, $f9 \r\n"
750  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
751  "paddsh $f28, $f8, $f10 \r\n"//src1+src2
752  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
753  "psubsh $f27, $f27, $f5 \r\n"
754  "psubsh $f28, $f28, $f6 \r\n"
755  "psubsh $f27, $f27, $f11 \r\n"
756  "psubsh $f28, $f28, $f12 \r\n"
757  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
758  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
759  "paddsh $f27, $f27, $f3 \r\n"
760  "paddsh $f28, $f28, $f4 \r\n"
761  "paddsh $f27, $f27, $f13 \r\n"
762  "paddsh $f28, $f28, $f14 \r\n"
763  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
764  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
765  "psrah $f27, $f27, %[ff_pw_5] \r\n"
766  "psrah $f28, $f28, %[ff_pw_5] \r\n"
767  "packushb $f27, $f27, $f0 \r\n"
768  "packushb $f28, $f28, $f0 \r\n"
769  "punpcklwd $f4, $f27, $f28 \r\n"
770  "sdc1 $f4, 0(%[dst1]) \r\n"
771  "paddsh $f27, $f9, $f11 \r\n"
772  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
773  "paddsh $f28, $f10, $f12 \r\n"//src2+src3
774  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
775  "psubsh $f27, $f27, $f7 \r\n"
776  "psubsh $f28, $f28, $f8 \r\n"
777  "psubsh $f27, $f27, $f13 \r\n"
778  "psubsh $f28, $f28, $f14 \r\n"
779  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
780  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
781  "paddsh $f27, $f27, $f5 \r\n"
782  "paddsh $f28, $f28, $f6 \r\n"
783  "paddsh $f27, $f27, $f15 \r\n"
784  "paddsh $f28, $f28, $f16 \r\n"
785  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
786  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
787  "psrah $f27, $f27, %[ff_pw_5] \r\n"
788  "psrah $f28, $f28, %[ff_pw_5] \r\n"
789  "packushb $f27, $f27, $f0 \r\n"
790  "packushb $f28, $f28, $f0 \r\n"
791  "punpcklwd $f6, $f27, $f28 \r\n"
792  "sdc1 $f6, 0(%[dst2]) \r\n"
793  "paddsh $f27, $f11, $f13 \r\n"
794  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
795  "paddsh $f28, $f12, $f14 \r\n"//src3+src4
796  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
797  "psubsh $f27, $f27, $f9 \r\n"
798  "psubsh $f28, $f28, $f10 \r\n"
799  "psubsh $f27, $f27, $f15 \r\n"
800  "psubsh $f28, $f28, $f16 \r\n"
801  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
802  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
803  "paddsh $f27, $f27, $f7 \r\n"
804  "paddsh $f28, $f28, $f8 \r\n"
805  "paddsh $f27, $f27, $f17 \r\n"
806  "paddsh $f28, $f28, $f18 \r\n"
807  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
808  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
809  "psrah $f27, $f27, %[ff_pw_5] \r\n"
810  "psrah $f28, $f28, %[ff_pw_5] \r\n"
811  "packushb $f27, $f27, $f0 \r\n"
812  "packushb $f28, $f28, $f0 \r\n"
813  "punpcklwd $f8, $f27, $f28 \r\n"
814  "sdc1 $f8, 0(%[dst3]) \r\n"
815  "paddsh $f27, $f13, $f15 \r\n"
816  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
817  "paddsh $f28, $f14, $f16 \r\n"//src4+src5
818  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
819  "psubsh $f27, $f27, $f11 \r\n"
820  "psubsh $f28, $f28, $f12 \r\n"
821  "psubsh $f27, $f27, $f17 \r\n"
822  "psubsh $f28, $f28, $f18 \r\n"
823  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
824  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
825  "paddsh $f27, $f27, $f9 \r\n"
826  "paddsh $f28, $f28, $f10 \r\n"
827  "paddsh $f27, $f27, $f19 \r\n"
828  "paddsh $f28, $f28, $f20 \r\n"
829  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
830  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
831  "psrah $f27, $f27, %[ff_pw_5] \r\n"
832  "psrah $f28, $f28, %[ff_pw_5] \r\n"
833  "packushb $f27, $f27, $f0 \r\n"
834  "packushb $f28, $f28, $f0 \r\n"
835  "punpcklwd $f10, $f27, $f28 \r\n"
836  "sdc1 $f10, 0(%[dst4]) \r\n"
837 
838  "paddsh $f27, $f15, $f17 \r\n"
839  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
840  "paddsh $f28, $f16, $f18 \r\n"//src5+src6
841  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
842  "psubsh $f27, $f27, $f13 \r\n"
843  "psubsh $f28, $f28, $f14 \r\n"
844  "psubsh $f27, $f27, $f19 \r\n"
845  "psubsh $f28, $f28, $f20 \r\n"
846  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
847  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
848  "paddsh $f27, $f27, $f11 \r\n"
849  "paddsh $f28, $f28, $f12 \r\n"
850  "paddsh $f27, $f27, $f21 \r\n"
851  "paddsh $f28, $f28, $f22 \r\n"
852  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
853  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
854  "psrah $f27, $f27, %[ff_pw_5] \r\n"
855  "psrah $f28, $f28, %[ff_pw_5] \r\n"
856  "packushb $f27, $f27, $f0 \r\n"
857  "packushb $f28, $f28, $f0 \r\n"
858  "punpcklwd $f12, $f27, $f28 \r\n"
859  "sdc1 $f12, 0(%[dst5]) \r\n"
860  "paddsh $f27, $f17, $f19 \r\n"
861  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
862  "paddsh $f28, $f18, $f20 \r\n"//src6+src7
863  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
864  "psubsh $f27, $f27, $f15 \r\n"
865  "psubsh $f28, $f28, $f16 \r\n"
866  "psubsh $f27, $f27, $f21 \r\n"
867  "psubsh $f28, $f28, $f22 \r\n"
868  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
869  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
870  "paddsh $f27, $f27, $f13 \r\n"
871  "paddsh $f28, $f28, $f14 \r\n"
872  "paddsh $f27, $f27, $f23 \r\n"
873  "paddsh $f28, $f28, $f24 \r\n"
874  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
875  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
876  "psrah $f27, $f27, %[ff_pw_5] \r\n"
877  "psrah $f28, $f28, %[ff_pw_5] \r\n"
878  "packushb $f27, $f27, $f0 \r\n"
879  "packushb $f28, $f28, $f0 \r\n"
880  "punpcklwd $f14, $f27, $f28 \r\n"
881  "sdc1 $f14, 0(%[dst6]) \r\n"
882  "paddsh $f27, $f19, $f21 \r\n"
883  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
884  "paddsh $f28, $f20, $f22 \r\n"//src7+src8
885  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
886  "psubsh $f27, $f27, $f17 \r\n"
887  "psubsh $f28, $f28, $f18 \r\n"
888  "psubsh $f27, $f27, $f23 \r\n"
889  "psubsh $f28, $f28, $f24 \r\n"
890  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
891  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
892  "paddsh $f27, $f27, $f15 \r\n"
893  "paddsh $f28, $f28, $f16 \r\n"
894  "paddsh $f27, $f27, $f25 \r\n"
895  "paddsh $f28, $f28, $f26 \r\n"
896  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
897  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
898  "psrah $f27, $f27, %[ff_pw_5] \r\n"
899  "psrah $f28, $f28, %[ff_pw_5] \r\n"
900  "packushb $f27, $f27, $f0 \r\n"
901  "packushb $f28, $f28, $f0 \r\n"
902  "punpcklwd $f16, $f27, $f28 \r\n"
903  "sdc1 $f16, 0(%[dst7]) \r\n"
904  ::[dst0]"r"(dst), [dst1]"r"(dst+dstStride),
905  [dst2]"r"(dst+2*dstStride), [dst3]"r"(dst+3*dstStride),
906  [dst4]"r"(dst+4*dstStride), [dst5]"r"(dst+5*dstStride),
907  [dst6]"r"(dst+6*dstStride), [dst7]"r"(dst+7*dstStride),
908  [srcB]"r"(src-2*srcStride), [srcA]"r"(src-srcStride),
909  [src0]"r"(src), [src1]"r"(src+srcStride),
910  [src2]"r"(src+2*srcStride), [src3]"r"(src+3*srcStride),
911  [src4]"r"(src+4*srcStride), [src5]"r"(src+5*srcStride),
912  [src6]"r"(src+6*srcStride), [src7]"r"(src+7*srcStride),
913  [src8]"r"(src+8*srcStride), [src9]"r"(src+9*srcStride),
914  [src10]"r"(src+10*srcStride), [ff_pw_4]"f"(ff_pw_4),
915  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
916  : "$f0","$f1","$f2","$f3","$f4","$f5","$f6","$f7","$f8","$f9","$f10",
917  "$f11","$f12","$f13","$f14","$f15","$f16","$f17","$f18","$f19",
918  "$f20","$f21","$f22","$f23","$f24","$f25","$f26","$f27","$f28"
919  );
920 }
921 
923  int dstStride, int srcStride)
924 {
925  put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
926  put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
927  src += 8*srcStride;
928  dst += 8*dstStride;
929  put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
930  put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
931 }
932 
934  int dstStride, int srcStride)
935 {
936  __asm__ volatile (
937  "xor $f0, $f0, $f0 \r\n"
938  "gslwlc1 $f2, 3(%[srcB]) \r\n"
939  "gslwrc1 $f2, 0(%[srcB]) \r\n"
940  "gslwlc1 $f4, 3(%[srcA]) \r\n"
941  "gslwrc1 $f4, 0(%[srcA]) \r\n"
942  "gslwlc1 $f6, 3(%[src0]) \r\n"
943  "gslwrc1 $f6, 0(%[src0]) \r\n"
944  "gslwlc1 $f8, 3(%[src1]) \r\n"
945  "gslwrc1 $f8, 0(%[src1]) \r\n"
946  "gslwlc1 $f10, 3(%[src2]) \r\n"
947  "gslwrc1 $f10, 0(%[src2]) \r\n"
948  "gslwlc1 $f12, 3(%[src3]) \r\n"
949  "gslwrc1 $f12, 0(%[src3]) \r\n"
950  "gslwlc1 $f14, 3(%[src4]) \r\n"
951  "gslwrc1 $f14, 0(%[src4]) \r\n"
952  "gslwlc1 $f16, 3(%[src5]) \r\n"
953  "gslwrc1 $f16, 0(%[src5]) \r\n"
954  "gslwlc1 $f18, 3(%[src6]) \r\n"
955  "gslwrc1 $f18, 0(%[src6]) \r\n"
956  "punpcklbh $f2, $f2, $f0 \r\n"
957  "punpcklbh $f4, $f4, $f0 \r\n"
958  "punpcklbh $f6, $f6, $f0 \r\n"
959  "punpcklbh $f8, $f8, $f0 \r\n"
960  "punpcklbh $f10, $f10, $f0 \r\n"
961  "punpcklbh $f12, $f12, $f0 \r\n"
962  "punpcklbh $f14, $f14, $f0 \r\n"
963  "punpcklbh $f16, $f16, $f0 \r\n"
964  "punpcklbh $f18, $f18, $f0 \r\n"
965  "paddsh $f20, $f6, $f8 \r\n"
966  "pmullh $f20, $f20, %[ff_pw_20] \r\n"
967  "paddsh $f22, $f4, $f10 \r\n"
968  "pmullh $f22, $f22, %[ff_pw_5] \r\n"
969  "psubsh $f24, $f20, $f22 \r\n"
970  "paddsh $f24, $f24, $f2 \r\n"
971  "paddsh $f24, $f24, $f12 \r\n"
972  "paddsh $f20, $f8, $f10 \r\n"
973  "pmullh $f20, $f20, %[ff_pw_20] \r\n"
974  "paddsh $f22, $f6, $f12 \r\n"
975  "pmullh $f22, $f22, %[ff_pw_5] \r\n"
976  "psubsh $f26, $f20, $f22 \r\n"
977  "paddsh $f26, $f26, $f4 \r\n"
978  "paddsh $f26, $f26, $f14 \r\n"
979  "paddsh $f20, $f10, $f12 \r\n"
980  "pmullh $f20, $f20, %[ff_pw_20] \r\n"
981  "paddsh $f22, $f8, $f14 \r\n"
982  "pmullh $f22, $f22, %[ff_pw_5] \r\n"
983  "psubsh $f28, $f20, $f22 \r\n"
984  "paddsh $f28, $f28, $f6 \r\n"
985  "paddsh $f28, $f28, $f16 \r\n"
986  "paddsh $f20, $f12, $f14 \r\n"
987  "pmullh $f20, $f20, %[ff_pw_20] \r\n"
988  "paddsh $f22, $f10, $f16 \r\n"
989  "pmullh $f22, $f22, %[ff_pw_5] \r\n"
990  "psubsh $f30, $f20, $f22 \r\n"
991  "paddsh $f30, $f30, $f8 \r\n"
992  "paddsh $f30, $f30, $f18 \r\n"
993  "paddsh $f24, $f24, %[ff_pw_16] \r\n"
994  "paddsh $f26, $f26, %[ff_pw_16] \r\n"
995  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
996  "paddsh $f30, $f30, %[ff_pw_16] \r\n"
997  "psrah $f24, $f24, %[ff_pw_5] \r\n"
998  "psrah $f26, $f26, %[ff_pw_5] \r\n"
999  "psrah $f28, $f28, %[ff_pw_5] \r\n"
1000  "psrah $f30, $f30, %[ff_pw_5] \r\n"
1001  "packushb $f24, $f24, $f0 \r\n"
1002  "packushb $f26, $f26, $f0 \r\n"
1003  "packushb $f28, $f28, $f0 \r\n"
1004  "packushb $f30, $f30, $f0 \r\n"
1005  "lwc1 $f2, 0(%[dst0]) \r\n"
1006  "lwc1 $f4, 0(%[dst1]) \r\n"
1007  "lwc1 $f6, 0(%[dst2]) \r\n"
1008  "lwc1 $f8, 0(%[dst3]) \r\n"
1009  "pavgb $f24, $f2, $f24 \r\n"
1010  "pavgb $f26, $f4, $f26 \r\n"
1011  "pavgb $f28, $f6, $f28 \r\n"
1012  "pavgb $f30, $f8, $f30 \r\n"
1013  "swc1 $f24, 0(%[dst0]) \r\n"
1014  "swc1 $f26, 0(%[dst1]) \r\n"
1015  "swc1 $f28, 0(%[dst2]) \r\n"
1016  "swc1 $f30, 0(%[dst3]) \r\n"
1017  ::[dst0]"r"(dst), [dst1]"r"(dst+dstStride),
1018  [dst2]"r"(dst+2*dstStride), [dst3]"r"(dst+3*dstStride),
1019  [srcB]"r"(src-2*srcStride), [srcA]"r"(src-srcStride),
1020  [src0]"r"(src), [src1]"r"(src+srcStride),
1021  [src2]"r"(src+2*srcStride), [src3]"r"(src+3*srcStride),
1022  [src4]"r"(src+4*srcStride), [src5]"r"(src+5*srcStride),
1023  [src6]"r"(src+6*srcStride), [ff_pw_20]"f"(ff_pw_20),
1024  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
1025  : "$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16","$f18",
1026  "$f20","$f22","$f24","$f26","$f28","$f30"
1027  );
1028 }
1029 
1031  int dstStride, int srcStride)
1032 {
1033  __asm__ volatile (
1034  "xor $f0, $f0, $f0 \r\n"
1035  "gsldlc1 $f2, 7(%[srcB]) \r\n"
1036  "gsldrc1 $f2, 0(%[srcB]) \r\n"
1037  "gsldlc1 $f4, 7(%[srcA]) \r\n"
1038  "gsldrc1 $f4, 0(%[srcA]) \r\n"
1039  "gsldlc1 $f6, 7(%[src0]) \r\n"
1040  "gsldrc1 $f6, 0(%[src0]) \r\n"
1041  "gsldlc1 $f8, 7(%[src1]) \r\n"
1042  "gsldrc1 $f8, 0(%[src1]) \r\n"
1043  "gsldlc1 $f10, 7(%[src2]) \r\n"
1044  "gsldrc1 $f10, 0(%[src2]) \r\n"
1045  "gsldlc1 $f12, 7(%[src3]) \r\n"
1046  "gsldrc1 $f12, 0(%[src3]) \r\n"
1047  "gsldlc1 $f14, 7(%[src4]) \r\n"
1048  "gsldrc1 $f14, 0(%[src4]) \r\n"
1049  "gsldlc1 $f16, 7(%[src5]) \r\n"
1050  "gsldrc1 $f16, 0(%[src5]) \r\n"
1051  "gsldlc1 $f18, 7(%[src6]) \r\n"
1052  "gsldrc1 $f18, 0(%[src6]) \r\n"
1053  "gsldlc1 $f20, 7(%[src7]) \r\n"
1054  "gsldrc1 $f20, 0(%[src7]) \r\n"
1055  "gsldlc1 $f22, 7(%[src8]) \r\n"
1056  "gsldrc1 $f22, 0(%[src8]) \r\n"
1057  "gsldlc1 $f24, 7(%[src9]) \r\n"
1058  "gsldrc1 $f24, 0(%[src9]) \r\n"
1059  "gsldlc1 $f26, 7(%[src10]) \r\n"
1060  "gsldrc1 $f26, 0(%[src10]) \r\n"
1061  "punpcklbh $f1, $f2, $f0 \r\n"
1062  "punpckhbh $f2, $f2, $f0 \r\n"
1063  "punpcklbh $f3, $f4, $f0 \r\n"
1064  "punpckhbh $f4, $f4, $f0 \r\n"
1065  "punpcklbh $f5, $f6, $f0 \r\n"
1066  "punpckhbh $f6, $f6, $f0 \r\n"
1067  "punpcklbh $f7, $f8, $f0 \r\n"
1068  "punpckhbh $f8, $f8, $f0 \r\n"
1069  "punpcklbh $f9, $f10, $f0 \r\n"
1070  "punpckhbh $f10, $f10, $f0 \r\n"
1071  "punpcklbh $f11, $f12, $f0 \r\n"
1072  "punpckhbh $f12, $f12, $f0 \r\n"
1073  "punpcklbh $f13, $f14, $f0 \r\n"
1074  "punpckhbh $f14, $f14, $f0 \r\n"
1075  "punpcklbh $f15, $f16, $f0 \r\n"
1076  "punpckhbh $f16, $f16, $f0 \r\n"
1077  "punpcklbh $f17, $f18, $f0 \r\n"
1078  "punpckhbh $f18, $f18, $f0 \r\n"
1079  "punpcklbh $f19, $f20, $f0 \r\n"
1080  "punpckhbh $f20, $f20, $f0 \r\n"
1081  "punpcklbh $f21, $f22, $f0 \r\n"
1082  "punpckhbh $f22, $f22, $f0 \r\n"
1083  "punpcklbh $f23, $f24, $f0 \r\n"
1084  "punpckhbh $f24, $f24, $f0 \r\n"
1085  "punpcklbh $f25, $f26, $f0 \r\n"
1086  "punpckhbh $f26, $f26, $f0 \r\n"
1087  "paddsh $f27, $f5, $f7 \r\n"
1088  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1089  "paddsh $f28, $f6, $f8 \r\n"//src0+src1
1090  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1091  "psubsh $f27, $f27, $f3 \r\n"
1092  "psubsh $f28, $f28, $f4 \r\n"
1093  "psubsh $f27, $f27, $f9 \r\n"
1094  "psubsh $f28, $f28, $f10 \r\n"
1095  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1096  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1097  "paddsh $f27, $f27, $f1 \r\n"
1098  "paddsh $f28, $f28, $f2 \r\n"
1099  "paddsh $f27, $f27, $f11 \r\n"
1100  "paddsh $f28, $f28, $f12 \r\n"
1101  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1102  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1103  "psrah $f27, $f27, %[ff_pw_5] \r\n"
1104  "psrah $f28, $f28, %[ff_pw_5] \r\n"
1105  "packushb $f27, $f27, $f0 \r\n"
1106  "packushb $f28, $f28, $f0 \r\n"
1107  "punpcklwd $f2, $f27, $f28 \r\n"
1108  "ldc1 $f28, 0(%[dst0]) \r\n"
1109  "pavgb $f2, $f2, $f28 \r\n"
1110  "sdc1 $f2, 0(%[dst0]) \r\n"
1111  "paddsh $f27, $f7, $f9 \r\n"
1112  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1113  "paddsh $f28, $f8, $f10 \r\n"//src1+src2
1114  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1115  "psubsh $f27, $f27, $f5 \r\n"
1116  "psubsh $f28, $f28, $f6 \r\n"
1117  "psubsh $f27, $f27, $f11 \r\n"
1118  "psubsh $f28, $f28, $f12 \r\n"
1119  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1120  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1121  "paddsh $f27, $f27, $f3 \r\n"
1122  "paddsh $f28, $f28, $f4 \r\n"
1123  "paddsh $f27, $f27, $f13 \r\n"
1124  "paddsh $f28, $f28, $f14 \r\n"
1125  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1126  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1127  "psrah $f27, $f27, %[ff_pw_5] \r\n"
1128  "psrah $f28, $f28, %[ff_pw_5] \r\n"
1129  "packushb $f27, $f27, $f0 \r\n"
1130  "packushb $f28, $f28, $f0 \r\n"
1131  "punpcklwd $f4, $f27, $f28 \r\n"
1132  "ldc1 $f28, 0(%[dst1]) \r\n"
1133  "pavgb $f4, $f4, $f28 \r\n"
1134  "sdc1 $f4, 0(%[dst1]) \r\n"
1135  "paddsh $f27, $f9, $f11 \r\n"
1136  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1137  "paddsh $f28, $f10, $f12 \r\n"//src2+src3
1138  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1139  "psubsh $f27, $f27, $f7 \r\n"
1140  "psubsh $f28, $f28, $f8 \r\n"
1141  "psubsh $f27, $f27, $f13 \r\n"
1142  "psubsh $f28, $f28, $f14 \r\n"
1143  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1144  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1145  "paddsh $f27, $f27, $f5 \r\n"
1146  "paddsh $f28, $f28, $f6 \r\n"
1147  "paddsh $f27, $f27, $f15 \r\n"
1148  "paddsh $f28, $f28, $f16 \r\n"
1149  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1150  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1151  "psrah $f27, $f27, %[ff_pw_5] \r\n"
1152  "psrah $f28, $f28, %[ff_pw_5] \r\n"
1153  "packushb $f27, $f27, $f0 \r\n"
1154  "packushb $f28, $f28, $f0 \r\n"
1155  "punpcklwd $f6, $f27, $f28 \r\n"
1156  "ldc1 $f28, 0(%[dst2]) \r\n"
1157  "pavgb $f6, $f6, $f28 \r\n"
1158  "sdc1 $f6, 0(%[dst2]) \r\n"
1159  "paddsh $f27, $f11, $f13 \r\n"
1160  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1161  "paddsh $f28, $f12, $f14 \r\n"//src3+src4
1162  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1163  "psubsh $f27, $f27, $f9 \r\n"
1164  "psubsh $f28, $f28, $f10 \r\n"
1165  "psubsh $f27, $f27, $f15 \r\n"
1166  "psubsh $f28, $f28, $f16 \r\n"
1167  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1168  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1169  "paddsh $f27, $f27, $f7 \r\n"
1170  "paddsh $f28, $f28, $f8 \r\n"
1171  "paddsh $f27, $f27, $f17 \r\n"
1172  "paddsh $f28, $f28, $f18 \r\n"
1173  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1174  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1175  "psrah $f27, $f27, %[ff_pw_5] \r\n"
1176  "psrah $f28, $f28, %[ff_pw_5] \r\n"
1177  "packushb $f27, $f27, $f0 \r\n"
1178  "packushb $f28, $f28, $f0 \r\n"
1179  "punpcklwd $f8, $f27, $f28 \r\n"
1180  "ldc1 $f28, 0(%[dst3]) \r\n"
1181  "pavgb $f8, $f8, $f28 \r\n"
1182  "sdc1 $f8, 0(%[dst3]) \r\n"
1183  "paddsh $f27, $f13, $f15 \r\n"
1184  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1185  "paddsh $f28, $f14, $f16 \r\n"//src4+src5
1186  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1187  "psubsh $f27, $f27, $f11 \r\n"
1188  "psubsh $f28, $f28, $f12 \r\n"
1189  "psubsh $f27, $f27, $f17 \r\n"
1190  "psubsh $f28, $f28, $f18 \r\n"
1191  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1192  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1193  "paddsh $f27, $f27, $f9 \r\n"
1194  "paddsh $f28, $f28, $f10 \r\n"
1195  "paddsh $f27, $f27, $f19 \r\n"
1196  "paddsh $f28, $f28, $f20 \r\n"
1197  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1198  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1199  "psrah $f27, $f27, %[ff_pw_5] \r\n"
1200  "psrah $f28, $f28, %[ff_pw_5] \r\n"
1201  "packushb $f27, $f27, $f0 \r\n"
1202  "packushb $f28, $f28, $f0 \r\n"
1203  "punpcklwd $f10, $f27, $f28 \r\n"
1204  "ldc1 $f28, 0(%[dst4]) \r\n"
1205  "pavgb $f10, $f10, $f28 \r\n"
1206  "sdc1 $f10, 0(%[dst4]) \r\n"
1207  "paddsh $f27, $f15, $f17 \r\n"
1208  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1209  "paddsh $f28, $f16, $f18 \r\n"//src5+src6
1210  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1211  "psubsh $f27, $f27, $f13 \r\n"
1212  "psubsh $f28, $f28, $f14 \r\n"
1213  "psubsh $f27, $f27, $f19 \r\n"
1214  "psubsh $f28, $f28, $f20 \r\n"
1215  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1216  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1217  "paddsh $f27, $f27, $f11 \r\n"
1218  "paddsh $f28, $f28, $f12 \r\n"
1219  "paddsh $f27, $f27, $f21 \r\n"
1220  "paddsh $f28, $f28, $f22 \r\n"
1221  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1222  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1223  "psrah $f27, $f27, %[ff_pw_5] \r\n"
1224  "psrah $f28, $f28, %[ff_pw_5] \r\n"
1225  "packushb $f27, $f27, $f0 \r\n"
1226  "packushb $f28, $f28, $f0 \r\n"
1227  "punpcklwd $f12, $f27, $f28 \r\n"
1228  "ldc1 $f28, 0(%[dst5]) \r\n"
1229  "pavgb $f12, $f12, $f28 \r\n"
1230  "sdc1 $f12, 0(%[dst5]) \r\n"
1231  "paddsh $f27, $f17, $f19 \r\n"
1232  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1233  "paddsh $f28, $f18, $f20 \r\n"//src6+src7
1234  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1235  "psubsh $f27, $f27, $f15 \r\n"
1236  "psubsh $f28, $f28, $f16 \r\n"
1237  "psubsh $f27, $f27, $f21 \r\n"
1238  "psubsh $f28, $f28, $f22 \r\n"
1239  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1240  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1241  "paddsh $f27, $f27, $f13 \r\n"
1242  "paddsh $f28, $f28, $f14 \r\n"
1243  "paddsh $f27, $f27, $f23 \r\n"
1244  "paddsh $f28, $f28, $f24 \r\n"
1245  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1246  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1247  "psrah $f27, $f27, %[ff_pw_5] \r\n"
1248  "psrah $f28, $f28, %[ff_pw_5] \r\n"
1249  "packushb $f27, $f27, $f0 \r\n"
1250  "packushb $f28, $f28, $f0 \r\n"
1251  "punpcklwd $f14, $f27, $f28 \r\n"
1252  "ldc1 $f28, 0(%[dst6]) \r\n"
1253  "pavgb $f14, $f14, $f28 \r\n"
1254  "sdc1 $f14, 0(%[dst6]) \r\n"
1255  "paddsh $f27, $f19, $f21 \r\n"
1256  "pmullh $f27, $f27, %[ff_pw_4] \r\n"
1257  "paddsh $f28, $f20, $f22 \r\n"//src7+src8
1258  "pmullh $f28, $f28, %[ff_pw_4] \r\n"
1259  "psubsh $f27, $f27, $f17 \r\n"
1260  "psubsh $f28, $f28, $f18 \r\n"
1261  "psubsh $f27, $f27, $f23 \r\n"
1262  "psubsh $f28, $f28, $f24 \r\n"
1263  "pmullh $f27, $f27, %[ff_pw_5] \r\n"
1264  "pmullh $f28, $f28, %[ff_pw_5] \r\n"
1265  "paddsh $f27, $f27, $f15 \r\n"
1266  "paddsh $f28, $f28, $f16 \r\n"
1267  "paddsh $f27, $f27, $f25 \r\n"
1268  "paddsh $f28, $f28, $f26 \r\n"
1269  "paddsh $f27, $f27, %[ff_pw_16] \r\n"
1270  "paddsh $f28, $f28, %[ff_pw_16] \r\n"
1271  "psrah $f27, $f27, %[ff_pw_5] \r\n"
1272  "psrah $f28, $f28, %[ff_pw_5] \r\n"
1273  "packushb $f27, $f27, $f0 \r\n"
1274  "packushb $f28, $f28, $f0 \r\n"
1275  "punpcklwd $f16, $f27, $f28 \r\n"
1276  "ldc1 $f28, 0(%[dst7]) \r\n"
1277  "pavgb $f16, $f16, $f28 \r\n"
1278  "sdc1 $f16, 0(%[dst7]) \r\n"
1279  ::[dst0]"r"(dst), [dst1]"r"(dst+dstStride),
1280  [dst2]"r"(dst+2*dstStride), [dst3]"r"(dst+3*dstStride),
1281  [dst4]"r"(dst+4*dstStride), [dst5]"r"(dst+5*dstStride),
1282  [dst6]"r"(dst+6*dstStride), [dst7]"r"(dst+7*dstStride),
1283  [srcB]"r"(src-2*srcStride), [srcA]"r"(src-srcStride),
1284  [src0]"r"(src), [src1]"r"(src+srcStride),
1285  [src2]"r"(src+2*srcStride), [src3]"r"(src+3*srcStride),
1286  [src4]"r"(src+4*srcStride), [src5]"r"(src+5*srcStride),
1287  [src6]"r"(src+6*srcStride), [src7]"r"(src+7*srcStride),
1288  [src8]"r"(src+8*srcStride), [src9]"r"(src+9*srcStride),
1289  [src10]"r"(src+10*srcStride), [ff_pw_4]"f"(ff_pw_4),
1290  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
1291  : "$f0","$f1","$f2","$f3","$f4","$f5","$f6","$f7","$f8","$f9","$f10",
1292  "$f11","$f12","$f13","$f14","$f15","$f16","$f17","$f18","$f19",
1293  "$f20","$f21","$f22","$f23","$f24","$f25","$f26","$f27","$f28"
1294  );
1295 }
1296 
1298  int dstStride, int srcStride)
1299 {
1300  avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1301  avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1302  src += 8*srcStride;
1303  dst += 8*dstStride;
1304  avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1305  avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1306 }
1307 
1309  int dstStride, int srcStride)
1310 {
1311  int i;
1312  int16_t _tmp[36];
1313  int16_t *tmp = _tmp;
1314  src -= 2*srcStride;
1315  __asm__ volatile (
1316  "xor $f0, $f0, $f0 \r\n"
1317  "dli $8, 9 \r\n"
1318  "1: \r\n"
1319  "gslwlc1 $f2, 1(%[src]) \r\n"
1320  "gslwrc1 $f2, -2(%[src]) \r\n"
1321  "gslwlc1 $f4, 2(%[src]) \r\n"
1322  "gslwrc1 $f4, -1(%[src]) \r\n"
1323  "gslwlc1 $f6, 3(%[src]) \r\n"
1324  "gslwrc1 $f6, 0(%[src]) \r\n"
1325  "gslwlc1 $f8, 4(%[src]) \r\n"
1326  "gslwrc1 $f8, 1(%[src]) \r\n"
1327  "gslwlc1 $f10, 5(%[src]) \r\n"
1328  "gslwrc1 $f10, 2(%[src]) \r\n"
1329  "gslwlc1 $f12, 6(%[src]) \r\n"
1330  "gslwrc1 $f12, 3(%[src]) \r\n"
1331  "punpcklbh $f2, $f2, $f0 \r\n"
1332  "punpcklbh $f4, $f4, $f0 \r\n"
1333  "punpcklbh $f6, $f6, $f0 \r\n"
1334  "punpcklbh $f8, $f8, $f0 \r\n"
1335  "punpcklbh $f10, $f10, $f0 \r\n"
1336  "punpcklbh $f12, $f12, $f0 \r\n"
1337  "paddsh $f14, $f6, $f8 \r\n"
1338  "paddsh $f16, $f4, $f10 \r\n"
1339  "paddsh $f18, $f2, $f12 \r\n"
1340  "pmullh $f14, $f14, %[ff_pw_20] \r\n"
1341  "pmullh $f16, $f16, %[ff_pw_5] \r\n"
1342  "psubsh $f14, $f14, $f16 \r\n"
1343  "paddsh $f18, $f14, $f18 \r\n"
1344  "sdc1 $f18, 0(%[tmp]) \r\n"
1345  "dadd %[tmp], %[tmp], %[tmpStride] \r\n"
1346  "dadd %[src], %[src], %[srcStride] \r\n"
1347  "daddi $8, $8, -1 \r\n"
1348  "bnez $8, 1b \r\n"
1349  : [tmp]"+&r"(tmp),[src]"+&r"(src)
1350  : [tmpStride]"r"(8),[srcStride]"r"(srcStride),
1351  [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5)
1352  : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16","$f18"
1353  );
1354 
1355  tmp -= 28;
1356 
1357  for(i=0; i<4; i++) {
1358  const int16_t tmpB= tmp[-8];
1359  const int16_t tmpA= tmp[-4];
1360  const int16_t tmp0= tmp[ 0];
1361  const int16_t tmp1= tmp[ 4];
1362  const int16_t tmp2= tmp[ 8];
1363  const int16_t tmp3= tmp[12];
1364  const int16_t tmp4= tmp[16];
1365  const int16_t tmp5= tmp[20];
1366  const int16_t tmp6= tmp[24];
1367  op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1368  op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1369  op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1370  op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1371  dst++;
1372  tmp++;
1373  }
1374 }
1375 
1377  int dstStride, int srcStride)
1378 {
1379  int16_t _tmp[104];
1380  int16_t *tmp = _tmp;
1381  int i;
1382  src -= 2*srcStride;
1383 
1384  __asm__ volatile (
1385  "xor $f0, $f0, $f0 \r\n"
1386  "dli $8, 13 \r\n"
1387  "1: \r\n"
1388  "gsldlc1 $f2, 5(%[src]) \r\n"
1389  "gsldrc1 $f2, -2(%[src]) \r\n"
1390  "gsldlc1 $f4, 6(%[src]) \r\n"
1391  "gsldrc1 $f4, -1(%[src]) \r\n"
1392  "gsldlc1 $f6, 7(%[src]) \r\n"
1393  "gsldrc1 $f6, 0(%[src]) \r\n"
1394  "gsldlc1 $f8, 8(%[src]) \r\n"
1395  "gsldrc1 $f8, 1(%[src]) \r\n"
1396  "gsldlc1 $f10, 9(%[src]) \r\n"
1397  "gsldrc1 $f10, 2(%[src]) \r\n"
1398  "gsldlc1 $f12, 10(%[src]) \r\n"
1399  "gsldrc1 $f12, 3(%[src]) \r\n"
1400  "punpcklbh $f1, $f2, $f0 \r\n"
1401  "punpcklbh $f3, $f4, $f0 \r\n"
1402  "punpcklbh $f5, $f6, $f0 \r\n"
1403  "punpcklbh $f7, $f8, $f0 \r\n"
1404  "punpcklbh $f9, $f10, $f0 \r\n"
1405  "punpcklbh $f11, $f12, $f0 \r\n"
1406  "punpckhbh $f2, $f2, $f0 \r\n"
1407  "punpckhbh $f4, $f4, $f0 \r\n"
1408  "punpckhbh $f6, $f6, $f0 \r\n"
1409  "punpckhbh $f8, $f8, $f0 \r\n"
1410  "punpckhbh $f10, $f10, $f0 \r\n"
1411  "punpckhbh $f12, $f12, $f0 \r\n"
1412  "paddsh $f13, $f5, $f7 \r\n"
1413  "paddsh $f15, $f3, $f9 \r\n"
1414  "paddsh $f17, $f1, $f11 \r\n"
1415  "pmullh $f13, $f13, %[ff_pw_20] \r\n"
1416  "pmullh $f15, $f15, %[ff_pw_5] \r\n"
1417  "psubsh $f13, $f13, $f15 \r\n"
1418  "paddsh $f17, $f13, $f17 \r\n"
1419  "paddsh $f14, $f6, $f8 \r\n"
1420  "paddsh $f16, $f4, $f10 \r\n"
1421  "paddsh $f18, $f2, $f12 \r\n"
1422  "pmullh $f14, $f14, %[ff_pw_20] \r\n"
1423  "pmullh $f16, $f16, %[ff_pw_5] \r\n"
1424  "psubsh $f14, $f14, $f16 \r\n"
1425  "paddsh $f18, $f14, $f18 \r\n"
1426  "sdc1 $f17, 0(%[tmp]) \r\n"
1427  "sdc1 $f18, 8(%[tmp]) \r\n"
1428  "dadd %[tmp], %[tmp], %[tmpStride] \r\n"
1429  "dadd %[src], %[src], %[srcStride] \r\n"
1430  "daddi $8, $8, -1 \r\n"
1431  "bnez $8, 1b \r\n"
1432  : [tmp]"+&r"(tmp),[src]"+&r"(src)
1433  : [tmpStride]"r"(16),[srcStride]"r"(srcStride),
1434  [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5)
1435  : "$8","$f0","$f1","$f2","$f3","$f4","$f5","$f6","$f7","$f8","$f9",
1436  "$f10","$f11","$f12","$f13","$f14","$f15","$f16","$f17","$f18"
1437  );
1438 
1439  tmp -= 88;
1440 
1441  for(i=0; i<8; i++) {
1442  const int tmpB= tmp[-16];
1443  const int tmpA= tmp[ -8];
1444  const int tmp0= tmp[ 0];
1445  const int tmp1= tmp[ 8];
1446  const int tmp2= tmp[ 16];
1447  const int tmp3= tmp[ 24];
1448  const int tmp4= tmp[ 32];
1449  const int tmp5= tmp[ 40];
1450  const int tmp6= tmp[ 48];
1451  const int tmp7= tmp[ 56];
1452  const int tmp8= tmp[ 64];
1453  const int tmp9= tmp[ 72];
1454  const int tmp10=tmp[ 80];
1455  op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1456  op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1457  op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1458  op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1459  op2_put(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));
1460  op2_put(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));
1461  op2_put(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));
1462  op2_put(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));
1463  dst++;
1464  tmp++;
1465  }
1466 }
1467 
1469  int dstStride, int srcStride)
1470 {
1471  put_h264_qpel8_hv_lowpass_mmi(dst, src, dstStride, srcStride);
1472  put_h264_qpel8_hv_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1473  src += 8*srcStride;
1474  dst += 8*dstStride;
1475  put_h264_qpel8_hv_lowpass_mmi(dst, src, dstStride, srcStride);
1476  put_h264_qpel8_hv_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1477 }
1478 
1480  int dstStride, int srcStride)
1481 {
1482  int i;
1483  int16_t _tmp[36];
1484  int16_t *tmp = _tmp;
1485  src -= 2*srcStride;
1486 
1487  __asm__ volatile (
1488  "xor $f0, $f0, $f0 \r\n"
1489  "dli $8, 9 \r\n"
1490  "1: \r\n"
1491  "gslwlc1 $f2, 1(%[src]) \r\n"
1492  "gslwrc1 $f2, -2(%[src]) \r\n"
1493  "gslwlc1 $f4, 2(%[src]) \r\n"
1494  "gslwrc1 $f4, -1(%[src]) \r\n"
1495  "gslwlc1 $f6, 3(%[src]) \r\n"
1496  "gslwrc1 $f6, 0(%[src]) \r\n"
1497  "gslwlc1 $f8, 4(%[src]) \r\n"
1498  "gslwrc1 $f8, 1(%[src]) \r\n"
1499  "gslwlc1 $f10, 5(%[src]) \r\n"
1500  "gslwrc1 $f10, 2(%[src]) \r\n"
1501  "gslwlc1 $f12, 6(%[src]) \r\n"
1502  "gslwrc1 $f12, 3(%[src]) \r\n"
1503  "punpcklbh $f2, $f2, $f0 \r\n"
1504  "punpcklbh $f4, $f4, $f0 \r\n"
1505  "punpcklbh $f6, $f6, $f0 \r\n"
1506  "punpcklbh $f8, $f8, $f0 \r\n"
1507  "punpcklbh $f10, $f10, $f0 \r\n"
1508  "punpcklbh $f12, $f12, $f0 \r\n"
1509  "paddsh $f14, $f6, $f8 \r\n"
1510  "paddsh $f16, $f4, $f10 \r\n"
1511  "paddsh $f18, $f2, $f12 \r\n"
1512  "pmullh $f14, $f14, %[ff_pw_20] \r\n"
1513  "pmullh $f16, $f16, %[ff_pw_5] \r\n"
1514  "psubsh $f14, $f14, $f16 \r\n"
1515  "paddsh $f18, $f14, $f18 \r\n"
1516  "sdc1 $f18, 0(%[tmp]) \r\n"
1517  "dadd %[tmp], %[tmp], %[tmpStride] \r\n"
1518  "dadd %[src], %[src], %[srcStride] \r\n"
1519  "daddi $8, $8, -1 \r\n"
1520  "bnez $8, 1b \r\n"
1521  : [tmp]"+&r"(tmp),[src]"+&r"(src)
1522  : [tmpStride]"r"(8),[srcStride]"r"(srcStride),
1523  [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5)
1524  : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16","$f18"
1525  );
1526 
1527  tmp -= 28;
1528 
1529  for(i=0; i<4; i++)
1530  {
1531  const int16_t tmpB= tmp[-8];
1532  const int16_t tmpA= tmp[-4];
1533  const int16_t tmp0= tmp[ 0];
1534  const int16_t tmp1= tmp[ 4];
1535  const int16_t tmp2= tmp[ 8];
1536  const int16_t tmp3= tmp[12];
1537  const int16_t tmp4= tmp[16];
1538  const int16_t tmp5= tmp[20];
1539  const int16_t tmp6= tmp[24];
1540  op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1541  op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1542  op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1543  op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1544  dst++;
1545  tmp++;
1546  }
1547 }
1548 
1550  int dstStride, int srcStride)
1551 {
1552  int16_t _tmp[104];
1553  int16_t *tmp = _tmp;
1554  int i;
1555  src -= 2*srcStride;
1556 
1557  __asm__ volatile (
1558  "xor $f0, $f0, $f0 \r\n"
1559  "dli $8, 13 \r\n"
1560  "1: \r\n"
1561  "gsldlc1 $f2, 5(%[src]) \r\n"
1562  "gsldrc1 $f2, -2(%[src]) \r\n"
1563  "gsldlc1 $f4, 6(%[src]) \r\n"
1564  "gsldrc1 $f4, -1(%[src]) \r\n"
1565  "gsldlc1 $f6, 7(%[src]) \r\n"
1566  "gsldrc1 $f6, 0(%[src]) \r\n"
1567  "gsldlc1 $f8, 8(%[src]) \r\n"
1568  "gsldrc1 $f8, 1(%[src]) \r\n"
1569  "gsldlc1 $f10, 9(%[src]) \r\n"
1570  "gsldrc1 $f10, 2(%[src]) \r\n"
1571  "gsldlc1 $f12, 10(%[src]) \r\n"
1572  "gsldrc1 $f12, 3(%[src]) \r\n"
1573  "punpcklbh $f1, $f2, $f0 \r\n"
1574  "punpcklbh $f3, $f4, $f0 \r\n"
1575  "punpcklbh $f5, $f6, $f0 \r\n"
1576  "punpcklbh $f7, $f8, $f0 \r\n"
1577  "punpcklbh $f9, $f10, $f0 \r\n"
1578  "punpcklbh $f11, $f12, $f0 \r\n"
1579  "punpckhbh $f2, $f2, $f0 \r\n"
1580  "punpckhbh $f4, $f4, $f0 \r\n"
1581  "punpckhbh $f6, $f6, $f0 \r\n"
1582  "punpckhbh $f8, $f8, $f0 \r\n"
1583  "punpckhbh $f10, $f10, $f0 \r\n"
1584  "punpckhbh $f12, $f12, $f0 \r\n"
1585  "paddsh $f13, $f5, $f7 \r\n"
1586  "paddsh $f15, $f3, $f9 \r\n"
1587  "paddsh $f17, $f1, $f11 \r\n"
1588  "pmullh $f13, $f13, %[ff_pw_20] \r\n"
1589  "pmullh $f15, $f15, %[ff_pw_5] \r\n"
1590  "psubsh $f13, $f13, $f15 \r\n"
1591  "paddsh $f17, $f13, $f17 \r\n"
1592  "paddsh $f14, $f6, $f8 \r\n"
1593  "paddsh $f16, $f4, $f10 \r\n"
1594  "paddsh $f18, $f2, $f12 \r\n"
1595  "pmullh $f14, $f14, %[ff_pw_20] \r\n"
1596  "pmullh $f16, $f16, %[ff_pw_5] \r\n"
1597  "psubsh $f14, $f14, $f16 \r\n"
1598  "paddsh $f18, $f14, $f18 \r\n"
1599 
1600  "sdc1 $f17, 0(%[tmp]) \r\n"
1601  "sdc1 $f18, 8(%[tmp]) \r\n"
1602  "dadd %[tmp], %[tmp], %[tmpStride] \r\n"
1603  "dadd %[src], %[src], %[srcStride] \r\n"
1604  "daddi $8, $8, -1 \r\n"
1605  "bnez $8, 1b \r\n"
1606  : [tmp]"+&r"(tmp),[src]"+&r"(src)
1607  : [tmpStride]"r"(16),[srcStride]"r"(srcStride),
1608  [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5)
1609  : "$8","$f0","$f1","$f2","$f3","$f4","$f5","$f6","$f7","$f8","$f9",
1610  "$f10","$f11","$f12","$f13","$f14","$f15","$f16","$f17","$f18"
1611  );
1612 
1613  tmp -= 88;
1614 
1615  for(i=0; i<8; i++) {
1616  const int tmpB= tmp[-16];
1617  const int tmpA= tmp[ -8];
1618  const int tmp0= tmp[ 0];
1619  const int tmp1= tmp[ 8];
1620  const int tmp2= tmp[ 16];
1621  const int tmp3= tmp[ 24];
1622  const int tmp4= tmp[ 32];
1623  const int tmp5= tmp[ 40];
1624  const int tmp6= tmp[ 48];
1625  const int tmp7= tmp[ 56];
1626  const int tmp8= tmp[ 64];
1627  const int tmp9= tmp[ 72];
1628  const int tmp10=tmp[ 80];
1629  op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1630  op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1631  op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1632  op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1633  op2_avg(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));
1634  op2_avg(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));
1635  op2_avg(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));
1636  op2_avg(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));
1637  dst++;
1638  tmp++;
1639  }
1640 }
1641 
1643  int dstStride, int srcStride){
1644  avg_h264_qpel8_hv_lowpass_mmi(dst, src, dstStride, srcStride);
1645  avg_h264_qpel8_hv_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1646  src += 8*srcStride;
1647  dst += 8*dstStride;
1648  avg_h264_qpel8_hv_lowpass_mmi(dst, src, dstStride, srcStride);
1649  avg_h264_qpel8_hv_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1650 }
1651 
1652 //DEF_H264_MC_MMI(put_, 4)
1654  ptrdiff_t stride)
1655 {
1656  put_pixels4_mmi(dst, src, stride, 4);
1657 }
1658 
1660  ptrdiff_t stride)
1661 {
1662  uint8_t half[16];
1663  put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
1664  put_pixels4_l2_mmi(dst, src, half, stride, stride, 4, 4);
1665 }
1666 
1668  ptrdiff_t stride)
1669 {
1670  put_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride);
1671 }
1672 
1674  ptrdiff_t stride)
1675 {
1676  uint8_t half[16];
1677  put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
1678  put_pixels4_l2_mmi(dst, src+1, half, stride, stride, 4, 4);
1679 }
1680 
1682  ptrdiff_t stride)
1683 {
1684  uint8_t full[36];
1685  uint8_t * const full_mid= full + 8;
1686  uint8_t half[16];
1687  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1688  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
1689  put_pixels4_l2_mmi(dst, full_mid, half, stride, 4, 4, 4);
1690 }
1691 
1693  ptrdiff_t stride)
1694 {
1695  uint8_t full[36];
1696  uint8_t * const full_mid= full + 8;
1697  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1698  put_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
1699 }
1700 
1702  ptrdiff_t stride)
1703 {
1704  uint8_t full[36];
1705  uint8_t * const full_mid= full + 8;
1706  uint8_t half[16];
1707  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1708  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
1709  put_pixels4_l2_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
1710 }
1711 
1713  ptrdiff_t stride)
1714 {
1715  uint8_t full[36];
1716  uint8_t * const full_mid= full + 8;
1717  uint8_t halfH[16];
1718  uint8_t halfV[16];
1719  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
1720  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1721  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1722  put_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
1723 }
1724 
1726  ptrdiff_t stride)
1727 {
1728  uint8_t full[36];
1729  uint8_t * const full_mid= full + 8;
1730  uint8_t halfH[16];
1731  uint8_t halfV[16];
1732  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
1733  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
1734  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1735  put_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
1736 }
1737 
1739  ptrdiff_t stride)
1740 {
1741  uint8_t full[36];
1742  uint8_t * const full_mid= full + 8;
1743  uint8_t halfH[16];
1744  uint8_t halfV[16];
1745  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
1746  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1747  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1748  put_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
1749 }
1750 
1752  ptrdiff_t stride)
1753 {
1754  uint8_t full[36];
1755  uint8_t * const full_mid= full + 8;
1756  uint8_t halfH[16];
1757  uint8_t halfV[16];
1758  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
1759  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
1760  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1761  put_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
1762 }
1763 
1765  ptrdiff_t stride)
1766 {
1767  put_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride);
1768 }
1769 
1771  ptrdiff_t stride)
1772 {
1773  uint8_t halfH[16];
1774  uint8_t halfHV[16];
1775  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
1776  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
1777  put_pixels4_l2_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
1778 }
1779 
1781  ptrdiff_t stride)
1782 {
1783  uint8_t halfH[16];
1784  uint8_t halfHV[16];
1785  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
1786  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
1787  put_pixels4_l2_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
1788 }
1789 
1791  ptrdiff_t stride)
1792 {
1793  uint8_t full[36];
1794  uint8_t * const full_mid= full + 8;
1795  uint8_t halfV[16];
1796  uint8_t halfHV[16];
1797  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1798  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1799  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
1800  put_pixels4_l2_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
1801 }
1802 
1804  ptrdiff_t stride)
1805 {
1806  uint8_t full[36];
1807  uint8_t * const full_mid= full + 8;
1808  uint8_t halfV[16];
1809  uint8_t halfHV[16];
1810  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
1811  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1812  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
1813  put_pixels4_l2_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
1814 }
1815 
1816 //DEF_H264_MC_MMI(avg_, 4)
1818  ptrdiff_t stride)
1819 {
1820  avg_pixels4_mmi(dst, src, stride, 4);
1821 }
1822 
1824  ptrdiff_t stride)
1825 {
1826  uint8_t half[16];
1827  put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
1828  avg_pixels4_l2_mmi(dst, src, half, stride, stride, 4, 4);
1829 }
1830 
1832  ptrdiff_t stride)
1833 {
1834  avg_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride);
1835 }
1836 
1838  ptrdiff_t stride)
1839 {
1840  uint8_t half[16];
1841  put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
1842  avg_pixels4_l2_mmi(dst, src+1, half, stride, stride, 4, 4);
1843 }
1844 
1846  ptrdiff_t stride)
1847 {
1848  uint8_t full[36];
1849  uint8_t * const full_mid= full + 8;
1850  uint8_t half[16];
1851  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1852  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
1853  avg_pixels4_l2_mmi(dst, full_mid, half, stride, 4, 4, 4);
1854 }
1855 
1857  ptrdiff_t stride)
1858 {
1859  uint8_t full[36];
1860  uint8_t * const full_mid= full + 8;
1861  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1862  avg_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
1863 }
1864 
1866  ptrdiff_t stride)
1867 {
1868  uint8_t full[36];
1869  uint8_t * const full_mid= full + 8;
1870  uint8_t half[16];
1871  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1872  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
1873  avg_pixels4_l2_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
1874 }
1875 
1877  ptrdiff_t stride)
1878 {
1879  uint8_t full[36];
1880  uint8_t * const full_mid= full + 8;
1881  uint8_t halfH[16];
1882  uint8_t halfV[16];
1883  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
1884  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1885  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1886  avg_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
1887 }
1888 
1890  ptrdiff_t stride)
1891 {
1892  uint8_t full[36];
1893  uint8_t * const full_mid= full + 8;
1894  uint8_t halfH[16];
1895  uint8_t halfV[16];
1896  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
1897  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
1898  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1899  avg_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
1900 }
1901 
1903  ptrdiff_t stride)
1904 {
1905  uint8_t full[36];
1906  uint8_t * const full_mid= full + 8;
1907  uint8_t halfH[16];
1908  uint8_t halfV[16];
1909  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
1910  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1911  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1912  avg_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
1913 }
1914 
1916  ptrdiff_t stride)
1917 {
1918  uint8_t full[36];
1919  uint8_t * const full_mid= full + 8;
1920  uint8_t halfH[16];
1921  uint8_t halfV[16];
1922  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
1923  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
1924  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1925  avg_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
1926 }
1927 
1929  ptrdiff_t stride)
1930 {
1931  avg_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride);
1932 }
1933 
1935  ptrdiff_t stride)
1936 {
1937  uint8_t halfH[16];
1938  uint8_t halfHV[16];
1939  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
1940  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
1941  avg_pixels4_l2_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
1942 }
1943 
1945  ptrdiff_t stride)
1946 {
1947  uint8_t halfH[16];
1948  uint8_t halfHV[16];
1949  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
1950  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
1951  avg_pixels4_l2_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
1952 }
1953 
1955  ptrdiff_t stride)
1956 {
1957  uint8_t full[36];
1958  uint8_t * const full_mid= full + 8;
1959  uint8_t halfV[16];
1960  uint8_t halfHV[16];
1961  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
1962  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1963  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
1964  avg_pixels4_l2_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
1965 }
1966 
1968  ptrdiff_t stride)
1969 {
1970  uint8_t full[36];
1971  uint8_t * const full_mid= full + 8;
1972  uint8_t halfV[16];
1973  uint8_t halfHV[16];
1974  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
1975  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
1976  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
1977  avg_pixels4_l2_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
1978 }
1979 
1980 //DEF_H264_MC_MMI(put_, 8)
1982  ptrdiff_t stride)
1983 {
1984  put_pixels8_mmi(dst, src, stride, 8);
1985 }
1986 
1988  ptrdiff_t stride)
1989 {
1990  uint8_t half[64];
1991  put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
1992  put_pixels8_l2_mmi(dst, src, half, stride, stride, 8, 8);
1993 }
1994 
1996  ptrdiff_t stride)
1997 {
1998  put_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride);
1999 }
2000 
2002  ptrdiff_t stride)
2003 {
2004  uint8_t half[64];
2005  put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2006  put_pixels8_l2_mmi(dst, src+1, half, stride, stride, 8, 8);
2007 }
2008 
2010  ptrdiff_t stride)
2011 {
2012  uint8_t full[104];
2013  uint8_t * const full_mid= full + 16;
2014  uint8_t half[64];
2015  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2016  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2017  put_pixels8_l2_mmi(dst, full_mid, half, stride, 8, 8, 8);
2018 }
2019 
2021  ptrdiff_t stride)
2022 {
2023  uint8_t full[104];
2024  uint8_t * const full_mid= full + 16;
2025  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2026  put_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2027 }
2028 
2030  ptrdiff_t stride)
2031 {
2032  uint8_t full[104];
2033  uint8_t * const full_mid= full + 16;
2034  uint8_t half[64];
2035  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2036  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2037  put_pixels8_l2_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2038 }
2039 
2041  ptrdiff_t stride)
2042 {
2043  uint8_t full[104];
2044  uint8_t * const full_mid= full + 16;
2045  uint8_t halfH[64];
2046  uint8_t halfV[64];
2047  put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2048  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2049  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2050  put_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2051 }
2052 
2054  ptrdiff_t stride)
2055 {
2056  uint8_t full[104];
2057  uint8_t * const full_mid= full + 16;
2058  uint8_t halfH[64];
2059  uint8_t halfV[64];
2060  put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2061  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2062  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2063  put_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2064 }
2065 
2067  ptrdiff_t stride)
2068 {
2069  uint8_t full[104];
2070  uint8_t * const full_mid= full + 16;
2071  uint8_t halfH[64];
2072  uint8_t halfV[64];
2073  put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2074  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2075  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2076  put_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2077 }
2078 
2080  ptrdiff_t stride)
2081 {
2082  uint8_t full[104];
2083  uint8_t * const full_mid= full + 16;
2084  uint8_t halfH[64];
2085  uint8_t halfV[64];
2086  put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2087  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2088  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2089  put_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2090 }
2091 
2093  ptrdiff_t stride)
2094 {
2095  put_h264_qpel8_hv_lowpass_mmi(dst, src, stride, stride);
2096 }
2097 
2099  ptrdiff_t stride)
2100 {
2101  uint8_t halfH[64];
2102  uint8_t halfHV[64];
2103  put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2104  put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
2105  put_pixels8_l2_mmi(dst, halfH, halfHV, stride, 8, 8, 8);
2106 }
2107 
2109  ptrdiff_t stride)
2110 {
2111  uint8_t halfH[64];
2112  uint8_t halfHV[64];
2113  put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2114  put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
2115  put_pixels8_l2_mmi(dst, halfH, halfHV, stride, 8, 8, 8);
2116 }
2117 
2119  ptrdiff_t stride)
2120 {
2121  uint8_t full[104];
2122  uint8_t * const full_mid= full + 16;
2123  uint8_t halfV[64];
2124  uint8_t halfHV[64];
2125  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2126  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2127  put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
2128  put_pixels8_l2_mmi(dst, halfV, halfHV, stride, 8, 8, 8);
2129 }
2130 
2132  ptrdiff_t stride)
2133 {
2134  uint8_t full[104];
2135  uint8_t * const full_mid= full + 16;
2136  uint8_t halfV[64];
2137  uint8_t halfHV[64];
2138  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2139  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2140  put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
2141  put_pixels8_l2_mmi(dst, halfV, halfHV, stride, 8, 8, 8);
2142 }
2143 
2144 //DEF_H264_MC_MMI(avg_, 8)
2146  ptrdiff_t stride)
2147 {
2148  avg_pixels8_mmi(dst, src, stride, 8);
2149 }
2150 
2152  ptrdiff_t stride)
2153 {
2154  uint8_t half[64];
2155  put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2156  avg_pixels8_l2_mmi(dst, src, half, stride, stride, 8, 8);
2157 }
2158 
2160  ptrdiff_t stride)
2161 {
2162  avg_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride);
2163 }
2164 
2166  ptrdiff_t stride)
2167 {
2168  uint8_t half[64];
2169  put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2170  avg_pixels8_l2_mmi(dst, src+1, half, stride, stride, 8, 8);
2171 }
2172 
2174  ptrdiff_t stride)
2175 {
2176  uint8_t full[104];
2177  uint8_t * const full_mid= full + 16;
2178  uint8_t half[64];
2179  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2180  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2181  avg_pixels8_l2_mmi(dst, full_mid, half, stride, 8, 8, 8);
2182 }
2183 
2185  ptrdiff_t stride)
2186 {
2187  uint8_t full[104];
2188  uint8_t * const full_mid= full + 16;
2189  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2190  avg_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2191 }
2192 
2194  ptrdiff_t stride)
2195 {
2196  uint8_t full[104];
2197  uint8_t * const full_mid= full + 16;
2198  uint8_t half[64];
2199  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2200  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2201  avg_pixels8_l2_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2202 }
2203 
2205  ptrdiff_t stride)
2206 {
2207  uint8_t full[104];
2208  uint8_t * const full_mid= full + 16;
2209  uint8_t halfH[64];
2210  uint8_t halfV[64];
2211  put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2212  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2213  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2214  avg_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2215 }
2216 
2218  ptrdiff_t stride)
2219 {
2220  uint8_t full[104];
2221  uint8_t * const full_mid= full + 16;
2222  uint8_t halfH[64];
2223  uint8_t halfV[64];
2224  put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2225  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2226  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2227  avg_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2228 }
2229 
2231  ptrdiff_t stride)
2232 {
2233  uint8_t full[104];
2234  uint8_t * const full_mid= full + 16;
2235  uint8_t halfH[64];
2236  uint8_t halfV[64];
2237  put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2238  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2239  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2240  avg_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2241 }
2242 
2244  ptrdiff_t stride)
2245 {
2246  uint8_t full[104];
2247  uint8_t * const full_mid= full + 16;
2248  uint8_t halfH[64];
2249  uint8_t halfV[64];
2250  put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2251  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2252  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2253  avg_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2254 }
2255 
2257  ptrdiff_t stride)
2258 {
2259  avg_h264_qpel8_hv_lowpass_mmi(dst, src, stride, stride);
2260 }
2261 
2263  ptrdiff_t stride)
2264 {
2265  uint8_t halfH[64];
2266  uint8_t halfHV[64];
2267  put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2268  put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
2269  avg_pixels8_l2_mmi(dst, halfH, halfHV, stride, 8, 8, 8);
2270 }
2271 
2273  ptrdiff_t stride)
2274 {
2275  uint8_t halfH[64];
2276  uint8_t halfHV[64];
2277  put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2278  put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
2279  avg_pixels8_l2_mmi(dst, halfH, halfHV, stride, 8, 8, 8);
2280 }
2281 
2283  ptrdiff_t stride)
2284 {
2285  uint8_t full[104];
2286  uint8_t * const full_mid= full + 16;
2287  uint8_t halfV[64];
2288  uint8_t halfHV[64];
2289  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2290  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2291  put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
2292  avg_pixels8_l2_mmi(dst, halfV, halfHV, stride, 8, 8, 8);
2293 }
2294 
2296  ptrdiff_t stride)
2297 {
2298  uint8_t full[104];
2299  uint8_t * const full_mid= full + 16;
2300  uint8_t halfV[64];
2301  uint8_t halfHV[64];
2302  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2303  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2304  put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
2305  avg_pixels8_l2_mmi(dst, halfV, halfHV, stride, 8, 8, 8);
2306 }
2307 
2308 //DEF_H264_MC_MMI(put_, 16)
2310  ptrdiff_t stride)
2311 {
2312  put_pixels16_mmi(dst, src, stride, 16);
2313 }
2314 
2316  ptrdiff_t stride)
2317 {
2318  uint8_t half[256];
2319  put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2320  put_pixels16_l2_mmi(dst, src, half, stride, stride, 16, 16);
2321 }
2322 
2324  ptrdiff_t stride)
2325 {
2326  put_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride);
2327 }
2328 
2330  ptrdiff_t stride)
2331 {
2332  uint8_t half[256];
2333  put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2334  put_pixels16_l2_mmi(dst, src+1, half, stride, stride, 16, 16);
2335 }
2336 
2338  ptrdiff_t stride)
2339 {
2340  uint8_t full[336];
2341  uint8_t * const full_mid= full + 32;
2342  uint8_t half[256];
2343  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2344  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2345  put_pixels16_l2_mmi(dst, full_mid, half, stride, 16, 16, 16);
2346 }
2347 
2349  ptrdiff_t stride)
2350 {
2351  uint8_t full[336];
2352  uint8_t * const full_mid= full + 32;
2353  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2354  put_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
2355 }
2356 
2358  ptrdiff_t stride)
2359 {
2360  uint8_t full[336];
2361  uint8_t * const full_mid= full + 32;
2362  uint8_t half[256];
2363  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2364  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2365  put_pixels16_l2_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
2366 }
2367 
2369  ptrdiff_t stride)
2370 {
2371  uint8_t full[336];
2372  uint8_t * const full_mid= full + 32;
2373  uint8_t halfH[256];
2374  uint8_t halfV[256];
2375  put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
2376  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2377  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2378  put_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2379 }
2380 
2382  ptrdiff_t stride)
2383 {
2384  uint8_t full[336];
2385  uint8_t * const full_mid= full + 32;
2386  uint8_t halfH[256];
2387  uint8_t halfV[256];
2388  put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
2389  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2390  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2391  put_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2392 }
2393 
2395  ptrdiff_t stride)
2396 {
2397  uint8_t full[336];
2398  uint8_t * const full_mid= full + 32;
2399  uint8_t halfH[256];
2400  uint8_t halfV[256];
2401  put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
2402  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2403  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2404  put_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2405 }
2406 
2408  ptrdiff_t stride)
2409 {
2410  uint8_t full[336];
2411  uint8_t * const full_mid= full + 32;
2412  uint8_t halfH[256];
2413  uint8_t halfV[256];
2414  put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
2415  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2416  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2417  put_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2418 }
2419 
2421  ptrdiff_t stride)
2422 {
2423  put_h264_qpel16_hv_lowpass_mmi(dst, src, stride, stride);
2424 }
2425 
2427  ptrdiff_t stride)
2428 {
2429  uint8_t halfH[256];
2430  uint8_t halfHV[256];
2431  put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
2432  put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
2433  put_pixels16_l2_mmi(dst, halfH, halfHV, stride, 16, 16, 16);
2434 }
2435 
2437  ptrdiff_t stride)
2438 {
2439  uint8_t halfH[256];
2440  uint8_t halfHV[256];
2441  put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
2442  put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
2443  put_pixels16_l2_mmi(dst, halfH, halfHV, stride, 16, 16, 16);
2444 }
2445 
2447  ptrdiff_t stride)
2448 {
2449  uint8_t full[336];
2450  uint8_t * const full_mid= full + 32;
2451  uint8_t halfV[256];
2452  uint8_t halfHV[256];
2453  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2454  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2455  put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
2456  put_pixels16_l2_mmi(dst, halfV, halfHV, stride, 16, 16, 16);
2457 }
2458 
2460  ptrdiff_t stride)
2461 {
2462  uint8_t full[336];
2463  uint8_t * const full_mid= full + 32;
2464  uint8_t halfV[256];
2465  uint8_t halfHV[256];
2466  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2467  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2468  put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
2469  put_pixels16_l2_mmi(dst, halfV, halfHV, stride, 16, 16, 16);
2470 }
2471 
2472 //DEF_H264_MC_MMI(avg_, 16)
2474  ptrdiff_t stride)
2475 {
2476  avg_pixels16_mmi(dst, src, stride, 16);
2477 }
2478 
2480  ptrdiff_t stride)
2481 {
2482  uint8_t half[256];
2483  put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2484  avg_pixels16_l2_mmi(dst, src, half, stride, stride, 16, 16);
2485 }
2486 
2488  ptrdiff_t stride)
2489 {
2490  avg_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride);
2491 }
2492 
2494  ptrdiff_t stride)
2495 {
2496  uint8_t half[256];
2497  put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2498  avg_pixels16_l2_mmi(dst, src+1, half, stride, stride, 16, 16);
2499 }
2500 
2502  ptrdiff_t stride)
2503 {
2504  uint8_t full[336];
2505  uint8_t * const full_mid= full + 32;
2506  uint8_t half[256];
2507  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2508  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2509  avg_pixels16_l2_mmi(dst, full_mid, half, stride, 16, 16, 16);
2510 }
2511 
2513  ptrdiff_t stride)
2514 {
2515  uint8_t full[336];
2516  uint8_t * const full_mid= full + 32;
2517  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2518  avg_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
2519 }
2520 
2522  ptrdiff_t stride)
2523 {
2524  uint8_t full[336];
2525  uint8_t * const full_mid= full + 32;
2526  uint8_t half[256];
2527  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2528  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2529  avg_pixels16_l2_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
2530 }
2531 
2533  ptrdiff_t stride)
2534 {
2535  uint8_t full[336];
2536  uint8_t * const full_mid= full + 32;
2537  uint8_t halfH[256];
2538  uint8_t halfV[256];
2539  put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
2540  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2541  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2542  avg_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2543 }
2544 
2546  ptrdiff_t stride)
2547 {
2548  uint8_t full[336];
2549  uint8_t * const full_mid= full + 32;
2550  uint8_t halfH[256];
2551  uint8_t halfV[256];
2552  put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
2553  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2554  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2555  avg_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2556 }
2557 
2559  ptrdiff_t stride)
2560 {
2561  uint8_t full[336];
2562  uint8_t * const full_mid= full + 32;
2563  uint8_t halfH[256];
2564  uint8_t halfV[256];
2565  put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
2566  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2567  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2568  avg_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2569 }
2570 
2572  ptrdiff_t stride)
2573 {
2574  uint8_t full[336];
2575  uint8_t * const full_mid= full + 32;
2576  uint8_t halfH[256];
2577  uint8_t halfV[256];
2578  put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
2579  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2580  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2581  avg_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2582 }
2583 
2585  ptrdiff_t stride)
2586 {
2587  avg_h264_qpel16_hv_lowpass_mmi(dst, src, stride, stride);
2588 }
2589 
2591  ptrdiff_t stride)
2592 {
2593  uint8_t halfH[256];
2594  uint8_t halfHV[256];
2595  put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
2596  put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
2597  avg_pixels16_l2_mmi(dst, halfH, halfHV, stride, 16, 16, 16);
2598 }
2599 
2601  ptrdiff_t stride)
2602 {
2603  uint8_t halfH[256];
2604  uint8_t halfHV[256];
2605  put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
2606  put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
2607  avg_pixels16_l2_mmi(dst, halfH, halfHV, stride, 16, 16, 16);
2608 }
2609 
2611  ptrdiff_t stride)
2612 {
2613  uint8_t full[336];
2614  uint8_t * const full_mid= full + 32;
2615  uint8_t halfV[256];
2616  uint8_t halfHV[256];
2617  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2618  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2619  put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
2620  avg_pixels16_l2_mmi(dst, halfV, halfHV, stride, 16, 16, 16);
2621 }
2622 
2624  ptrdiff_t stride)
2625 {
2626  uint8_t full[336];
2627  uint8_t * const full_mid= full + 32;
2628  uint8_t halfV[256];
2629  uint8_t halfHV[256];
2630  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2631  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2632  put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
2633  avg_pixels16_l2_mmi(dst, halfV, halfHV, stride, 16, 16, 16);
2634 }
2635 
2636 #undef op2_avg
2637 #undef op2_put
void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: h264qpel_mmi.c:173
void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:581
void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define op_avg(a, b)
Definition: h264qpel_mmi.c:89
void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
const char * b
Definition: vf_curves.c:109
void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
const uint64_t ff_pw_5
Definition: constants.c:29
static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define pixel4
void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:505
void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
uint8_t
static void avg_pixels4_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: h264qpel_mmi.c:151
void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels16_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: h264qpel_mmi.c:253
void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:570
static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:331
void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
const uint64_t ff_pw_4
Definition: constants.c:28
void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
const uint64_t ff_pw_20
Definition: constants.c:35
void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels16_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: h264qpel_mmi.c:304
void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void put_pixels8_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: h264qpel_mmi.c:237
void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:454
static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:443
void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels4_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: h264qpel_mmi.c:90
static void copy_block16_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:65
static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:922
void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define src
Definition: vp9dsp.c:530
void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define src1
Definition: h264pred.c:139
static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:380
void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:670
void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define rnd_avg_pixel4
#define src0
Definition: h264pred.c:138
void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: h264qpel_mmi.c:109
static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:933
void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define op2_put(a, b)
Definition: h264qpel_mmi.c:330
static void copy_block8_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:46
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void copy_block4_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:27
void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
const uint64_t ff_pw_16
Definition: constants.c:33
void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels4_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: h264qpel_mmi.c:275
void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
int pixels
Definition: avisynth_c.h:298
void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels4_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: h264qpel_mmi.c:224
void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels8_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: h264qpel_mmi.c:288
void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define op_put(a, b)
Definition: h264qpel_mmi.c:88
void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
#define op2_avg(a, b)
Definition: h264qpel_mmi.c:329
void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define AV_RN4P
void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: h264qpel_mmi.c:128
static void avg_pixels16_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: h264qpel_mmi.c:195
void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static int16_t block[64]
Definition: dct-test.c:112