FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hpeldsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized qpeldsp
3  *
4  * Copyright (c) 2016 Loongson Technology Corporation Limited
5  * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "hpeldsp_mips.h"
26 #include "libavutil/mips/asmdefs.h"
27 #include "constants.h"
28 
30  ptrdiff_t line_size, int h)
31 {
32  double ftmp[2];
33  mips_reg addr[2];
34  uint64_t low32;
35 
36  __asm__ volatile (
37  PTR_ADDU "%[addr1], %[line_size], %[line_size] \n\t"
38  "1: \n\t"
39  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
40  "uld %[low32], 0x00(%[pixels]) \n\t"
41  "mtc1 %[low32], %[ftmp0] \n\t"
42  "uld %[low32], 0x00(%[addr0]) \n\t"
43  "mtc1 %[low32], %[ftmp1] \n\t"
44  "swc1 %[ftmp0], 0x00(%[block]) \n\t"
45  "gsswxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
46  PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
47  PTR_ADDU "%[block], %[block], %[addr1] \n\t"
48 
49  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
50  "uld %[low32], 0x00(%[pixels]) \n\t"
51  "mtc1 %[low32], %[ftmp0] \n\t"
52  "uld %[low32], 0x00(%[addr0]) \n\t"
53  "mtc1 %[low32], %[ftmp1] \n\t"
54  "swc1 %[ftmp0], 0x00(%[block]) \n\t"
55  "gsswxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
56  PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
57  PTR_ADDU "%[block], %[block], %[addr1] \n\t"
58 
59  PTR_ADDI "%[h], %[h], -0x04 \n\t"
60  "bnez %[h], 1b \n\t"
61  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
62  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
63  [low32]"=&r"(low32),
64  [block]"+&r"(block), [pixels]"+&r"(pixels),
65  [h]"+&r"(h)
66  : [line_size]"r"((mips_reg)line_size)
67  : "memory"
68  );
69 }
70 
72  ptrdiff_t line_size, int h)
73 {
74  double ftmp[2];
75  mips_reg addr[2];
76 
77  __asm__ volatile (
78  PTR_ADDU "%[addr1], %[line_size], %[line_size] \n\t"
79  "1: \n\t"
80  "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
81  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
82  "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
83  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
84  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
85  "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
86  "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
87  PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
88  PTR_ADDU "%[block], %[block], %[addr1] \n\t"
89 
90  "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
91  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
92  "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
93  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
94  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
95  "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
96  "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
97  PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
98  PTR_ADDU "%[block], %[block], %[addr1] \n\t"
99 
100  PTR_ADDI "%[h], %[h], -0x04 \n\t"
101  "bnez %[h], 1b \n\t"
102  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
103  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
104  [block]"+&r"(block), [pixels]"+&r"(pixels),
105  [h]"+&r"(h)
106  : [line_size]"r"((mips_reg)line_size)
107  : "memory"
108  );
109 }
110 
112  ptrdiff_t line_size, int h)
113 {
114  double ftmp[4];
115  mips_reg addr[2];
116 
117  __asm__ volatile (
118  PTR_ADDU "%[addr1], %[line_size], %[line_size] \n\t"
119  "1: \n\t"
120  "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
121  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
122  "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
123  "gsldlc1 %[ftmp2], 0x0f(%[pixels]) \n\t"
124  "gsldrc1 %[ftmp2], 0x08(%[pixels]) \n\t"
125  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
126  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
127  "gsldlc1 %[ftmp3], 0x0f(%[addr0]) \n\t"
128  "gsldrc1 %[ftmp3], 0x08(%[addr0]) \n\t"
129  "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
130  "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
131  "sdc1 %[ftmp2], 0x08(%[block]) \n\t"
132  "gssdxc1 %[ftmp3], 0x08(%[block], %[line_size]) \n\t"
133  PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
134  PTR_ADDU "%[block], %[block], %[addr1] \n\t"
135 
136  "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
137  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
138  "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
139  "gsldlc1 %[ftmp2], 0x0f(%[pixels]) \n\t"
140  "gsldrc1 %[ftmp2], 0x08(%[pixels]) \n\t"
141  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
142  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
143  "gsldlc1 %[ftmp3], 0x0f(%[addr0]) \n\t"
144  "gsldrc1 %[ftmp3], 0x08(%[addr0]) \n\t"
145  "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
146  "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
147  "sdc1 %[ftmp2], 0x08(%[block]) \n\t"
148  "gssdxc1 %[ftmp3], 0x08(%[block], %[line_size]) \n\t"
149  PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
150  PTR_ADDU "%[block], %[block], %[addr1] \n\t"
151 
152  PTR_ADDI "%[h], %[h], -0x04 \n\t"
153  "bnez %[h], 1b \n\t"
154  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
155  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
156  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
157  [block]"+&r"(block), [pixels]"+&r"(pixels),
158  [h]"+&r"(h)
159  : [line_size]"r"((mips_reg)line_size)
160  : "memory"
161  );
162 }
163 
165  ptrdiff_t line_size, int h)
166 {
167  double ftmp[4];
168  mips_reg addr[3];
169  uint64_t low32;
170 
171  __asm__ volatile (
172  PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
173  "1: \n\t"
174  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
175  "uld %[low32], 0x00(%[pixels]) \n\t"
176  "mtc1 %[low32], %[ftmp0] \n\t"
177  "uld %[low32], 0x00(%[addr0]) \n\t"
178  "mtc1 %[low32], %[ftmp1] \n\t"
179  PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
180  "uld %[low32], 0x00(%[block]) \n\t"
181  "mtc1 %[low32], %[ftmp2] \n\t"
182  "uld %[low32], 0x00(%[addr1]) \n\t"
183  "mtc1 %[low32], %[ftmp3] \n\t"
184  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
185  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
186  "swc1 %[ftmp0], 0x00(%[block]) \n\t"
187  "gsswxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
188  PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
189  PTR_ADDU "%[block], %[block], %[addr2] \n\t"
190 
191  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
192  "uld %[low32], 0x00(%[pixels]) \n\t"
193  "mtc1 %[low32], %[ftmp0] \n\t"
194  "uld %[low32], 0x00(%[addr0]) \n\t"
195  "mtc1 %[low32], %[ftmp1] \n\t"
196  PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
197  "uld %[low32], 0x00(%[block]) \n\t"
198  "mtc1 %[low32], %[ftmp2] \n\t"
199  "uld %[low32], 0x00(%[addr1]) \n\t"
200  "mtc1 %[low32], %[ftmp3] \n\t"
201  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
202  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
203  "swc1 %[ftmp0], 0x00(%[block]) \n\t"
204  "gsswxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
205  PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
206  PTR_ADDU "%[block], %[block], %[addr2] \n\t"
207 
208  PTR_ADDI "%[h], %[h], -0x04 \n\t"
209  "bnez %[h], 1b \n\t"
210  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
211  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
212  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
213  [addr2]"=&r"(addr[2]),
214  [low32]"=&r"(low32),
215  [block]"+&r"(block), [pixels]"+&r"(pixels),
216  [h]"+&r"(h)
217  : [line_size]"r"((mips_reg)line_size)
218  : "memory"
219  );
220 }
221 
223  ptrdiff_t line_size, int h)
224 {
225  double ftmp[4];
226  mips_reg addr[3];
227 
228  __asm__ volatile (
229  PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
230  "1: \n\t"
231  "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
232  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
233  "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
234  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
235  PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
236  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
237  "gsldlc1 %[ftmp2], 0x07(%[block]) \n\t"
238  "gsldrc1 %[ftmp2], 0x00(%[block]) \n\t"
239  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
240  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
241  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
242  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
243  "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
244  "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
245  PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
246  PTR_ADDU "%[block], %[block], %[addr2] \n\t"
247 
248  "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
249  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
250  "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
251  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
252  PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
253  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
254  "gsldlc1 %[ftmp2], 0x07(%[block]) \n\t"
255  "gsldrc1 %[ftmp2], 0x00(%[block]) \n\t"
256  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
257  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
258  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
259  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
260  "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
261  "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
262  PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
263  PTR_ADDU "%[block], %[block], %[addr2] \n\t"
264 
265  PTR_ADDI "%[h], %[h], -0x04 \n\t"
266  "bnez %[h], 1b \n\t"
267  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
268  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
269  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
270  [addr2]"=&r"(addr[2]),
271  [block]"+&r"(block), [pixels]"+&r"(pixels),
272  [h]"+&r"(h)
273  : [line_size]"r"((mips_reg)line_size)
274  : "memory"
275  );
276 }
277 
279  ptrdiff_t line_size, int h)
280 {
281  double ftmp[8];
282  mips_reg addr[3];
283 
284  __asm__ volatile (
285  PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
286  "1: \n\t"
287  "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
288  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
289  "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
290  "gsldlc1 %[ftmp4], 0x0f(%[pixels]) \n\t"
291  PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
292  "gsldrc1 %[ftmp4], 0x08(%[pixels]) \n\t"
293  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
294  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
295  "gsldlc1 %[ftmp5], 0x0f(%[addr0]) \n\t"
296  "gsldrc1 %[ftmp5], 0x08(%[addr0]) \n\t"
297  "gsldlc1 %[ftmp2], 0x07(%[block]) \n\t"
298  "gsldrc1 %[ftmp2], 0x00(%[block]) \n\t"
299  "gsldlc1 %[ftmp6], 0x0f(%[block]) \n\t"
300  "gsldrc1 %[ftmp6], 0x08(%[block]) \n\t"
301  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
302  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
303  "gsldlc1 %[ftmp7], 0x0f(%[addr1]) \n\t"
304  "gsldrc1 %[ftmp7], 0x08(%[addr1]) \n\t"
305  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
306  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
307  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
308  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
309  "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
310  "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
311  "sdc1 %[ftmp4], 0x08(%[block]) \n\t"
312  "gssdxc1 %[ftmp5], 0x08(%[block], %[line_size]) \n\t"
313  PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
314  PTR_ADDU "%[block], %[block], %[addr2] \n\t"
315 
316  "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
317  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
318  "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
319  "gsldlc1 %[ftmp4], 0x0f(%[pixels]) \n\t"
320  PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
321  "gsldrc1 %[ftmp4], 0x08(%[pixels]) \n\t"
322  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
323  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
324  "gsldlc1 %[ftmp5], 0x0f(%[addr0]) \n\t"
325  "gsldrc1 %[ftmp5], 0x08(%[addr0]) \n\t"
326  "gsldlc1 %[ftmp2], 0x07(%[block]) \n\t"
327  "gsldrc1 %[ftmp2], 0x00(%[block]) \n\t"
328  "gsldlc1 %[ftmp6], 0x0f(%[block]) \n\t"
329  "gsldrc1 %[ftmp6], 0x08(%[block]) \n\t"
330  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
331  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
332  "gsldlc1 %[ftmp7], 0x0f(%[addr1]) \n\t"
333  "gsldrc1 %[ftmp7], 0x08(%[addr1]) \n\t"
334  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
335  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
336  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
337  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
338  "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
339  "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
340  "sdc1 %[ftmp4], 0x08(%[block]) \n\t"
341  "gssdxc1 %[ftmp5], 0x08(%[block], %[line_size]) \n\t"
342  PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
343  PTR_ADDU "%[block], %[block], %[addr2] \n\t"
344 
345  PTR_ADDI "%[h], %[h], -0x04 \n\t"
346  "bnez %[h], 1b \n\t"
347  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
348  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
349  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
350  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
351  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
352  [addr2]"=&r"(addr[2]),
353  [block]"+&r"(block), [pixels]"+&r"(pixels),
354  [h]"+&r"(h)
355  : [line_size]"r"((mips_reg)line_size)
356  : "memory"
357  );
358 }
359 
360 inline void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
361  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
362  int h)
363 {
364  double ftmp[4];
365  mips_reg addr[5];
366  uint64_t low32;
367 
368  __asm__ volatile (
369  PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
370  PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
371  PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
372  "1: \n\t"
373  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
374  "uld %[low32], 0x00(%[src1]) \n\t"
375  "mtc1 %[low32], %[ftmp0] \n\t"
376  "uld %[low32], 0x00(%[addr0]) \n\t"
377  "mtc1 %[low32], %[ftmp1] \n\t"
378  "uld %[low32], 0x00(%[src2]) \n\t"
379  "mtc1 %[low32], %[ftmp2] \n\t"
380  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
381  "uld %[low32], 0x00(%[addr1]) \n\t"
382  "mtc1 %[low32], %[ftmp3] \n\t"
383  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
384  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
385  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
386  "swc1 %[ftmp0], 0x00(%[dst]) \n\t"
387  "gsswxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
388  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
389  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
390 
391  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
392  "uld %[low32], 0x00(%[src1]) \n\t"
393  "mtc1 %[low32], %[ftmp0] \n\t"
394  "uld %[low32], 0x00(%[addr0]) \n\t"
395  "mtc1 %[low32], %[ftmp1] \n\t"
396  "uld %[low32], 0x00(%[src2]) \n\t"
397  "mtc1 %[low32], %[ftmp2] \n\t"
398  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
399  "uld %[low32], 0x00(%[addr1]) \n\t"
400  "mtc1 %[low32], %[ftmp3] \n\t"
401  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
402  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
403  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
404  "swc1 %[ftmp0], 0x00(%[dst]) \n\t"
405  "gsswxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
406  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
407  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
408 
409  PTR_ADDI "%[h], %[h], -0x04 \n\t"
410  "bnez %[h], 1b \n\t"
411  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
412  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
413  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
414  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
415  [addr4]"=&r"(addr[4]),
416  [low32]"=&r"(low32),
417  [dst]"+&r"(dst), [src1]"+&r"(src1),
418  [src2]"+&r"(src2), [h]"+&r"(h)
419  : [dst_stride]"r"((mips_reg)dst_stride),
420  [src_stride1]"r"((mips_reg)src_stride1),
421  [src_stride2]"r"((mips_reg)src_stride2)
422  : "memory"
423  );
424 }
425 
426 inline void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
427  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
428  int h)
429 {
430  double ftmp[4];
431  mips_reg addr[5];
432 
433  __asm__ volatile (
434  PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
435  PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
436  PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
437  "1: \n\t"
438  "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
439  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
440  "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
441  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
442  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
443  "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
444  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
445  "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
446  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
447  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
448  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
449  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
450  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
451  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
452  "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
453  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
454  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
455 
456  "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
457  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
458  "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
459  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
460  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
461  "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
462  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
463  "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
464  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
465  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
466  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
467  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
468  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
469  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
470  "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
471  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
472  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
473 
474  PTR_ADDI "%[h], %[h], -0x04 \n\t"
475  "bnez %[h], 1b \n\t"
476  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
477  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
478  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
479  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
480  [addr4]"=&r"(addr[4]),
481  [dst]"+&r"(dst), [src1]"+&r"(src1),
482  [src2]"+&r"(src2), [h]"+&r"(h)
483  : [dst_stride]"r"((mips_reg)dst_stride),
484  [src_stride1]"r"((mips_reg)src_stride1),
485  [src_stride2]"r"((mips_reg)src_stride2)
486  : "memory"
487  );
488 }
489 
490 inline void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
491  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
492  int h)
493 {
494  double ftmp[8];
495  mips_reg addr[5];
496 
497  __asm__ volatile (
498  PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
499  PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
500  PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
501  "1: \n\t"
502  "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
503  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
504  "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
505  "gsldlc1 %[ftmp4], 0x0f(%[src1]) \n\t"
506  "gsldrc1 %[ftmp4], 0x08(%[src1]) \n\t"
507  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
508  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
509  "gsldlc1 %[ftmp5], 0x0f(%[addr0]) \n\t"
510  "gsldrc1 %[ftmp5], 0x08(%[addr0]) \n\t"
511  "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
512  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
513  "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
514  "gsldlc1 %[ftmp6], 0x0f(%[src2]) \n\t"
515  "gsldrc1 %[ftmp6], 0x08(%[src2]) \n\t"
516  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
517  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
518  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
519  "gsldlc1 %[ftmp7], 0x0f(%[addr1]) \n\t"
520  "gsldrc1 %[ftmp7], 0x08(%[addr1]) \n\t"
521  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
522  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
523  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
524  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
525  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
526  "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
527  "sdc1 %[ftmp4], 0x08(%[dst]) \n\t"
528  "gssdxc1 %[ftmp5], 0x08(%[dst], %[dst_stride]) \n\t"
529  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
530  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
531 
532  "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
533  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
534  "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
535  "gsldlc1 %[ftmp4], 0x0f(%[src1]) \n\t"
536  "gsldrc1 %[ftmp4], 0x08(%[src1]) \n\t"
537  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
538  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
539  "gsldlc1 %[ftmp5], 0x0f(%[addr0]) \n\t"
540  "gsldrc1 %[ftmp5], 0x08(%[addr0]) \n\t"
541  "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
542  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
543  "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
544  "gsldlc1 %[ftmp6], 0x0f(%[src2]) \n\t"
545  "gsldrc1 %[ftmp6], 0x08(%[src2]) \n\t"
546  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
547  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
548  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
549  "gsldlc1 %[ftmp7], 0x0f(%[addr1]) \n\t"
550  "gsldrc1 %[ftmp7], 0x08(%[addr1]) \n\t"
551  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
552  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
553  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
554  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
555  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
556  "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
557  "sdc1 %[ftmp4], 0x08(%[dst]) \n\t"
558  "gssdxc1 %[ftmp5], 0x08(%[dst], %[dst_stride]) \n\t"
559  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
560  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
561 
562  PTR_ADDI "%[h], %[h], -0x04 \n\t"
563  "bnez %[h], 1b \n\t"
564  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
565  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
566  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
567  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
568  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
569  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
570  [addr4]"=&r"(addr[4]),
571  [dst]"+&r"(dst), [src1]"+&r"(src1),
572  [src2]"+&r"(src2), [h]"+&r"(h)
573  : [dst_stride]"r"((mips_reg)dst_stride),
574  [src_stride1]"r"((mips_reg)src_stride1),
575  [src_stride2]"r"((mips_reg)src_stride2)
576  : "memory"
577  );
578 }
579 
580 inline void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
581  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
582  int h)
583 {
584  double ftmp[6];
585  mips_reg addr[6];
586  uint64_t low32;
587 
588  __asm__ volatile (
589  PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
590  PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
591  PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
592  "1: \n\t"
593  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
594  "uld %[low32], 0x00(%[src1]) \n\t"
595  "mtc1 %[low32], %[ftmp0] \n\t"
596  "uld %[low32], 0x00(%[addr0]) \n\t"
597  "mtc1 %[low32], %[ftmp1] \n\t"
598  "uld %[low32], 0x00(%[src2]) \n\t"
599  "mtc1 %[low32], %[ftmp2] \n\t"
600  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
601  "uld %[low32], 0x00(%[addr1]) \n\t"
602  "mtc1 %[low32], %[ftmp3] \n\t"
603  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
604  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
605  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
606  PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
607  "uld %[low32], 0x00(%[dst]) \n\t"
608  "mtc1 %[low32], %[ftmp4] \n\t"
609  "uld %[low32], 0x00(%[addr5]) \n\t"
610  "mtc1 %[low32], %[ftmp5] \n\t"
611  "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
612  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
613  "swc1 %[ftmp0], 0x00(%[dst]) \n\t"
614  "gsswxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
615  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
616  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
617 
618  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
619  "uld %[low32], 0x00(%[src1]) \n\t"
620  "mtc1 %[low32], %[ftmp0] \n\t"
621  "uld %[low32], 0x00(%[addr0]) \n\t"
622  "mtc1 %[low32], %[ftmp1] \n\t"
623  "uld %[low32], 0x00(%[src2]) \n\t"
624  "mtc1 %[low32], %[ftmp2] \n\t"
625  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
626  "uld %[low32], 0x00(%[addr1]) \n\t"
627  "mtc1 %[low32], %[ftmp3] \n\t"
628  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
629  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
630  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
631  PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
632  "uld %[low32], 0x00(%[dst]) \n\t"
633  "mtc1 %[low32], %[ftmp4] \n\t"
634  "uld %[low32], 0x00(%[addr5]) \n\t"
635  "mtc1 %[low32], %[ftmp5] \n\t"
636  "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
637  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
638  "swc1 %[ftmp0], 0x00(%[dst]) \n\t"
639  "gsswxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
640  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
641  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
642 
643  PTR_ADDI "%[h], %[h], -0x04 \n\t"
644  "bnez %[h], 1b \n\t"
645  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
646  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
647  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
648  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
649  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
650  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
651  [low32]"=&r"(low32),
652  [dst]"+&r"(dst), [src1]"+&r"(src1),
653  [src2]"+&r"(src2), [h]"+&r"(h)
654  : [dst_stride]"r"((mips_reg)dst_stride),
655  [src_stride1]"r"((mips_reg)src_stride1),
656  [src_stride2]"r"((mips_reg)src_stride2)
657  : "memory"
658  );
659 }
660 
661 inline void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
662  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
663  int h)
664 {
665  double ftmp[6];
666  mips_reg addr[6];
667 
668  __asm__ volatile (
669  PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
670  PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
671  PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
672  "1: \n\t"
673  "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
674  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
675  "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
676  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
677  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
678  "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
679  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
680  "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
681  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
682  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
683  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
684  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
685  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
686  PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
687  "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t"
688  "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t"
689  "gsldlc1 %[ftmp5], 0x07(%[addr5]) \n\t"
690  "gsldrc1 %[ftmp5], 0x00(%[addr5]) \n\t"
691  "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
692  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
693  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
694  "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
695  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
696  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
697 
698  "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
699  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
700  "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
701  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
702  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
703  "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
704  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
705  "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
706  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
707  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
708  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
709  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
710  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
711  PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
712  "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t"
713  "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t"
714  "gsldlc1 %[ftmp5], 0x07(%[addr5]) \n\t"
715  "gsldrc1 %[ftmp5], 0x00(%[addr5]) \n\t"
716  "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
717  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
718  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
719  "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
720  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
721  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
722 
723  PTR_ADDI "%[h], %[h], -0x04 \n\t"
724  "bnez %[h], 1b \n\t"
725  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
726  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
727  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
728  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
729  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
730  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
731  [dst]"+&r"(dst), [src1]"+&r"(src1),
732  [src2]"+&r"(src2), [h]"+&r"(h)
733  : [dst_stride]"r"((mips_reg)dst_stride),
734  [src_stride1]"r"((mips_reg)src_stride1),
735  [src_stride2]"r"((mips_reg)src_stride2)
736  : "memory"
737  );
738 }
739 
740 inline void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
741  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
742  int h)
743 {
744  ff_avg_pixels8_l2_8_mmi(dst, src1, src2, dst_stride, src_stride1,
745  src_stride2, h);
746  ff_avg_pixels8_l2_8_mmi(dst + 8, src1 + 8, src2 + 8, dst_stride,
747  src_stride1, src_stride2, h);
748 }
749 
751  ptrdiff_t line_size, int h)
752 {
753  ff_put_pixels4_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
754  line_size, h);
755 }
756 
758  ptrdiff_t line_size, int h)
759 {
760  ff_put_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
761  line_size, h);
762 }
763 
765  ptrdiff_t line_size, int h)
766 {
767  ff_put_pixels16_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
768  line_size, h);
769 }
770 
772  ptrdiff_t line_size, int h)
773 {
774  ff_avg_pixels4_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
775  line_size, h);
776 }
777 
779  ptrdiff_t line_size, int h)
780 {
781  ff_avg_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
782  line_size, h);
783 }
784 
786  ptrdiff_t line_size, int h)
787 {
788  ff_avg_pixels8_x2_8_mmi(block, pixels, line_size, h);
789  ff_avg_pixels8_x2_8_mmi(block + 8, pixels + 8, line_size, h);
790 }
791 
793  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
794  int h)
795 {
796  double ftmp[5];
797  mips_reg addr[5];
798 
799  __asm__ volatile (
800  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
801  PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
802  PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
803  PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
804  "1: \n\t"
805  "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
806  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
807  "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
808  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
809  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
810  "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
811  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
812  "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
813  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
814  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
815  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
816  "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
817  "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
818  "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
819  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
820  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
821  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
822  "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
823  "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
824  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
825  "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
826  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
827  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
828 
829  "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
830  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
831  "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
832  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
833  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
834  "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
835  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
836  "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
837  "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
838  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
839  "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
840  "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
841  "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
842  "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
843  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
844  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
845  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
846  "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
847  "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
848  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
849  "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
850  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
851  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
852 
853  PTR_ADDI "%[h], %[h], -0x04 \n\t"
854  "bnez %[h], 1b \n\t"
855  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
856  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
857  [ftmp4]"=&f"(ftmp[4]),
858  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
859  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
860  [addr4]"=&r"(addr[4]),
861  [dst]"+&r"(dst), [src1]"+&r"(src1),
862  [src2]"+&r"(src2), [h]"+&r"(h)
863  : [dst_stride]"r"((mips_reg)dst_stride),
864  [src_stride1]"r"((mips_reg)src_stride1),
865  [src_stride2]"r"((mips_reg)src_stride2)
866  : "memory"
867  );
868 }
869 
871  ptrdiff_t line_size, int h)
872 {
873  ff_put_no_rnd_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size,
874  line_size, line_size, h);
875 }
876 
878  ptrdiff_t line_size, int h)
879 {
880  ff_put_no_rnd_pixels8_x2_8_mmi(block, pixels, line_size, h);
881  ff_put_no_rnd_pixels8_x2_8_mmi(block + 8, pixels + 8, line_size, h);
882 }
883 
885  ptrdiff_t line_size, int h)
886 {
887  ff_put_pixels4_l2_8_mmi(block, pixels, pixels + line_size, line_size,
888  line_size, line_size, h);
889 }
890 
892  ptrdiff_t line_size, int h)
893 {
894  ff_put_pixels8_l2_8_mmi(block, pixels, pixels + line_size, line_size,
895  line_size, line_size, h);
896 }
897 
899  ptrdiff_t line_size, int h)
900 {
901  ff_put_pixels16_l2_8_mmi(block, pixels, pixels + line_size, line_size,
902  line_size, line_size, h);
903 }
904 
906  ptrdiff_t line_size, int h)
907 {
908  ff_avg_pixels4_l2_8_mmi(block, pixels, pixels + line_size, line_size,
909  line_size, line_size, h);
910 }
911 
913  ptrdiff_t line_size, int h)
914 {
915  ff_avg_pixels8_l2_8_mmi(block, pixels, pixels + line_size, line_size,
916  line_size, line_size, h);
917 }
918 
920  ptrdiff_t line_size, int h)
921 {
922  ff_avg_pixels8_y2_8_mmi(block, pixels, line_size, h);
923  ff_avg_pixels8_y2_8_mmi(block + 8, pixels + 8, line_size, h);
924 }
925 
927  ptrdiff_t line_size, int h)
928 {
929  ff_put_no_rnd_pixels8_l2_8_mmi(block, pixels, pixels + line_size,
930  line_size, line_size, line_size, h);
931 }
932 
934  ptrdiff_t line_size, int h)
935 {
936  ff_put_no_rnd_pixels8_y2_8_mmi(block, pixels, line_size, h);
937  ff_put_no_rnd_pixels8_y2_8_mmi(block + 8 , pixels + 8, line_size, h);
938 }
939 
941  ptrdiff_t line_size, int h)
942 {
943  /* FIXME HIGH BIT DEPTH */
944  int i;
945  const uint32_t a = AV_RN32(pixels);
946  const uint32_t b = AV_RN32(pixels + 1);
947  uint32_t l0 = (a & 0x03030303UL) +
948  (b & 0x03030303UL) +
949  0x02020202UL;
950  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
951  ((b & 0xFCFCFCFCUL) >> 2);
952  uint32_t l1, h1;
953 
954  pixels += line_size;
955  for (i = 0; i < h; i += 2) {
956  uint32_t a = AV_RN32(pixels);
957  uint32_t b = AV_RN32(pixels + 1);
958  l1 = (a & 0x03030303UL) +
959  (b & 0x03030303UL);
960  h1 = ((a & 0xFCFCFCFCUL) >> 2) +
961  ((b & 0xFCFCFCFCUL) >> 2);
962  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
963  pixels += line_size;
964  block += line_size;
965  a = AV_RN32(pixels);
966  b = AV_RN32(pixels + 1);
967  l0 = (a & 0x03030303UL) +
968  (b & 0x03030303UL) +
969  0x02020202UL;
970  h0 = ((a & 0xFCFCFCFCUL) >> 2) +
971  ((b & 0xFCFCFCFCUL) >> 2);
972  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
973  pixels += line_size;
974  block += line_size;
975  }
976 }
977 
979  ptrdiff_t line_size, int h)
980 {
981 #if 1
982  double ftmp[10];
983  mips_reg addr[2];
984 
985  __asm__ volatile (
986  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
987  "dli %[addr0], 0x0f \n\t"
988  "pcmpeqw %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
989  "dmtc1 %[addr0], %[ftmp8] \n\t"
990  "dli %[addr0], 0x01 \n\t"
991  "psrlh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
992  "dmtc1 %[addr0], %[ftmp8] \n\t"
993  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
994 
995  "dli %[addr0], 0x02 \n\t"
996  "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
997  "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
998  "dmtc1 %[addr0], %[ftmp9] \n\t"
999  "gsldlc1 %[ftmp4], 0x08(%[pixels]) \n\t"
1000  "gsldrc1 %[ftmp4], 0x01(%[pixels]) \n\t"
1001  "mov.d %[ftmp1], %[ftmp0] \n\t"
1002  "mov.d %[ftmp5], %[ftmp4] \n\t"
1003  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1004  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1005  "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1006  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1007  "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1008  "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1009  "xor %[addr0], %[addr0], %[addr0] \n\t"
1010  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
1011  ".p2align 3 \n\t"
1012  "1: \n\t"
1013  PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
1014  "gsldlc1 %[ftmp0], 0x07(%[addr1]) \n\t"
1015  "gsldrc1 %[ftmp0], 0x00(%[addr1]) \n\t"
1016  "gsldlc1 %[ftmp2], 0x08(%[addr1]) \n\t"
1017  "gsldrc1 %[ftmp2], 0x01(%[addr1]) \n\t"
1018  "mov.d %[ftmp1], %[ftmp0] \n\t"
1019  "mov.d %[ftmp3], %[ftmp2] \n\t"
1020  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1021  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1022  "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1023  "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1024  "paddush %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1025  "paddush %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1026  "paddush %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1027  "paddush %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1028  "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1029  "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1030  "psrlh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1031  "psrlh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
1032  "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1033  "gssdxc1 %[ftmp4], 0x00(%[block], %[addr0]) \n\t"
1034  PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
1035  PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
1036  "gsldlc1 %[ftmp2], 0x07(%[addr1]) \n\t"
1037  "gsldrc1 %[ftmp2], 0x00(%[addr1]) \n\t"
1038  "gsldlc1 %[ftmp4], 0x08(%[addr1]) \n\t"
1039  "gsldrc1 %[ftmp4], 0x01(%[addr1]) \n\t"
1040  "mov.d %[ftmp3], %[ftmp2] \n\t"
1041  "mov.d %[ftmp5], %[ftmp4] \n\t"
1042  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1043  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1044  "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1045  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1046  "paddush %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
1047  "paddush %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1048  "paddush %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1049  "paddush %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1050  "paddush %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1051  "paddush %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1052  "psrlh %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1053  "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
1054  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1055  "gssdxc1 %[ftmp0], 0x00(%[block], %[addr0]) \n\t"
1056  PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
1057  PTR_ADDU "%[h], %[h], -0x02 \n\t"
1058  "bnez %[h], 1b \n\t"
1059  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1060  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1061  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1062  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1063  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1064  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
1065  [h]"+&r"(h), [pixels]"+&r"(pixels)
1066  : [block]"r"(block), [line_size]"r"((mips_reg)line_size)
1067  : "memory"
1068  );
1069 #else
1070  /* FIXME HIGH BIT DEPTH */
1071  int j;
1072 
1073  for (j = 0; j < 2; j++) {
1074  int i;
1075  const uint32_t a = AV_RN32(pixels);
1076  const uint32_t b = AV_RN32(pixels + 1);
1077  uint32_t l0 = (a & 0x03030303UL) +
1078  (b & 0x03030303UL) +
1079  0x02020202UL;
1080  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1081  ((b & 0xFCFCFCFCUL) >> 2);
1082  uint32_t l1, h1;
1083 
1084  pixels += line_size;
1085  for (i = 0; i < h; i += 2) {
1086  uint32_t a = AV_RN32(pixels);
1087  uint32_t b = AV_RN32(pixels + 1);
1088  l1 = (a & 0x03030303UL) +
1089  (b & 0x03030303UL);
1090  h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1091  ((b & 0xFCFCFCFCUL) >> 2);
1092  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1093  pixels += line_size;
1094  block += line_size;
1095  a = AV_RN32(pixels);
1096  b = AV_RN32(pixels + 1);
1097  l0 = (a & 0x03030303UL) +
1098  (b & 0x03030303UL) +
1099  0x02020202UL;
1100  h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1101  ((b & 0xFCFCFCFCUL) >> 2);
1102  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1103  pixels += line_size;
1104  block += line_size;
1105  }
1106  pixels += 4 - line_size * (h + 1);
1107  block += 4 - line_size * h;
1108  }
1109 #endif
1110 }
1111 
1113  ptrdiff_t line_size, int h)
1114 {
1115  ff_put_pixels8_xy2_8_mmi(block, pixels, line_size, h);
1116  ff_put_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
1117 }
1118 
1120  ptrdiff_t line_size, int h)
1121 {
1122  /* FIXME HIGH BIT DEPTH */
1123  int i;
1124  const uint32_t a = AV_RN32(pixels);
1125  const uint32_t b = AV_RN32(pixels + 1);
1126  uint32_t l0 = (a & 0x03030303UL) +
1127  (b & 0x03030303UL) +
1128  0x02020202UL;
1129  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1130  ((b & 0xFCFCFCFCUL) >> 2);
1131  uint32_t l1, h1;
1132 
1133  pixels += line_size;
1134  for (i = 0; i < h; i += 2) {
1135  uint32_t a = AV_RN32(pixels);
1136  uint32_t b = AV_RN32(pixels + 1);
1137  l1 = (a & 0x03030303UL) +
1138  (b & 0x03030303UL);
1139  h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1140  ((b & 0xFCFCFCFCUL) >> 2);
1141  *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1142  pixels += line_size;
1143  block += line_size;
1144  a = AV_RN32(pixels);
1145  b = AV_RN32(pixels + 1);
1146  l0 = (a & 0x03030303UL) +
1147  (b & 0x03030303UL) +
1148  0x02020202UL;
1149  h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1150  ((b & 0xFCFCFCFCUL) >> 2);
1151  *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1152  pixels += line_size;
1153  block += line_size;
1154  }
1155 }
1156 
1158  ptrdiff_t line_size, int h)
1159 {
1160  /* FIXME HIGH BIT DEPTH */
1161  int j;
1162 
1163  for (j = 0; j < 2; j++) {
1164  int i;
1165  const uint32_t a = AV_RN32(pixels);
1166  const uint32_t b = AV_RN32(pixels + 1);
1167  uint32_t l0 = (a & 0x03030303UL) +
1168  (b & 0x03030303UL) +
1169  0x02020202UL;
1170  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1171  ((b & 0xFCFCFCFCUL) >> 2);
1172  uint32_t l1, h1;
1173 
1174  pixels += line_size;
1175  for (i = 0; i < h; i += 2) {
1176  uint32_t a = AV_RN32(pixels);
1177  uint32_t b = AV_RN32(pixels + 1);
1178  l1 = (a & 0x03030303UL) +
1179  (b & 0x03030303UL);
1180  h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1181  ((b & 0xFCFCFCFCUL) >> 2);
1182  *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1183  pixels += line_size;
1184  block += line_size;
1185  a = AV_RN32(pixels);
1186  b = AV_RN32(pixels + 1);
1187  l0 = (a & 0x03030303UL) +
1188  (b & 0x03030303UL) +
1189  0x02020202UL;
1190  h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1191  ((b & 0xFCFCFCFCUL) >> 2);
1192  *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1193  pixels += line_size;
1194  block += line_size;
1195  }
1196  pixels += 4 - line_size * (h + 1);
1197  block += 4 - line_size * h;
1198  }
1199 }
1200 
1202  ptrdiff_t line_size, int h)
1203 {
1204  ff_avg_pixels8_xy2_8_mmi(block, pixels, line_size, h);
1205  ff_avg_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
1206 }
1207 
1209  ptrdiff_t line_size, int h)
1210 {
1211  /* FIXME HIGH BIT DEPTH */
1212  int j;
1213 
1214  for (j = 0; j < 2; j++) {
1215  int i;
1216  const uint32_t a = AV_RN32(pixels);
1217  const uint32_t b = AV_RN32(pixels + 1);
1218  uint32_t l0 = (a & 0x03030303UL) +
1219  (b & 0x03030303UL) +
1220  0x01010101UL;
1221  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1222  ((b & 0xFCFCFCFCUL) >> 2);
1223  uint32_t l1, h1;
1224 
1225  pixels += line_size;
1226  for (i = 0; i < h; i += 2) {
1227  uint32_t a = AV_RN32(pixels);
1228  uint32_t b = AV_RN32(pixels + 1);
1229  l1 = (a & 0x03030303UL) +
1230  (b & 0x03030303UL);
1231  h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1232  ((b & 0xFCFCFCFCUL) >> 2);
1233  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1234  pixels += line_size;
1235  block += line_size;
1236  a = AV_RN32(pixels);
1237  b = AV_RN32(pixels + 1);
1238  l0 = (a & 0x03030303UL) +
1239  (b & 0x03030303UL) +
1240  0x01010101UL;
1241  h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1242  ((b & 0xFCFCFCFCUL) >> 2);
1243  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1244  pixels += line_size;
1245  block += line_size;
1246  }
1247  pixels += 4 - line_size * (h + 1);
1248  block += 4 - line_size * h;
1249  }
1250 }
1251 
1253  ptrdiff_t line_size, int h)
1254 {
1255  ff_put_no_rnd_pixels8_xy2_8_mmi(block, pixels, line_size, h);
1256  ff_put_no_rnd_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
1257 }
#define mips_reg
Definition: asmdefs.h:44
void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:360
void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:164
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
void ff_avg_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:905
void ff_put_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:978
void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:580
void ff_put_no_rnd_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:1208
const char * b
Definition: vf_curves.c:109
void ff_avg_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:1119
void ff_avg_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:919
static int16_t block[64]
Definition: dct.c:113
void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:490
void ff_put_no_rnd_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:877
uint8_t
void ff_put_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:898
void ff_put_no_rnd_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:870
#define PTR_ADDI
Definition: asmdefs.h:49
void ff_put_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:750
void ff_avg_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:1157
void ff_put_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:891
void ff_put_no_rnd_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:1252
void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:29
void ff_put_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:940
void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:278
void ff_put_no_rnd_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:926
void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:661
void ff_put_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:884
static uint32_t rnd_avg32(uint32_t a, uint32_t b)
Definition: rnd_avg.h:31
void ff_avg_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:912
#define src1
Definition: h264pred.c:139
void ff_avg_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:785
void ff_avg_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:1201
void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:426
#define AV_RN32(p)
Definition: intreadwrite.h:364
void ff_put_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:764
void ff_put_no_rnd_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:792
void ff_avg_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:771
void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:222
void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:111
void ff_avg_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:778
int pixels
Definition: avisynth_c.h:298
void ff_put_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:757
void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:740
void ff_put_no_rnd_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:933
#define PTR_ADDU
Definition: asmdefs.h:47
void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:71
void ff_put_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:1112