FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
h264dsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264dsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  * Heiher <r@hev.cc>
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
27 #include "h264dsp_mips.h"
28 
29 void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
30 {
31  __asm__ volatile (
32  "xor $f0, $f0, $f0 \r\n"
33  "ldc1 $f2, 0(%[src]) \r\n"
34  "ldc1 $f4, 8(%[src]) \r\n"
35  "ldc1 $f6, 16(%[src]) \r\n"
36  "ldc1 $f8, 24(%[src]) \r\n"
37  "lwc1 $f10, 0(%[dst0]) \r\n"
38  "lwc1 $f12, 0(%[dst1]) \r\n"
39  "lwc1 $f14, 0(%[dst2]) \r\n"
40  "lwc1 $f16, 0(%[dst3]) \r\n"
41  "punpcklbh $f10, $f10, $f0 \r\n"
42  "punpcklbh $f12, $f12, $f0 \r\n"
43  "punpcklbh $f14, $f14, $f0 \r\n"
44  "punpcklbh $f16, $f16, $f0 \r\n"
45  "paddh $f2, $f2, $f10 \r\n"
46  "paddh $f4, $f4, $f12 \r\n"
47  "paddh $f6, $f6, $f14 \r\n"
48  "paddh $f8, $f8, $f16 \r\n"
49  "packushb $f2, $f2, $f0 \r\n"
50  "packushb $f4, $f4, $f0 \r\n"
51  "packushb $f6, $f6, $f0 \r\n"
52  "packushb $f8, $f8, $f0 \r\n"
53  "swc1 $f2, 0(%[dst0]) \r\n"
54  "swc1 $f4, 0(%[dst1]) \r\n"
55  "swc1 $f6, 0(%[dst2]) \r\n"
56  "swc1 $f8, 0(%[dst3]) \r\n"
57  ::[dst0]"r"(dst),[dst1]"r"(dst+stride),[dst2]"r"(dst+2*stride),
58  [dst3]"r"(dst+3*stride),[src]"r"(src)
59  : "$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16"
60  );
61 
62  memset(src, 0, 32);
63 }
64 
65 void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
66 {
67  __asm__ volatile (
68  "dli $8, 1 \r\n"
69  "ldc1 $f0, 0(%[block]) \r\n"
70  "dmtc1 $8, $f16 \r\n"
71  "ldc1 $f2, 8(%[block]) \r\n"
72  "dli $8, 6 \r\n"
73  "ldc1 $f4, 16(%[block]) \r\n"
74  "dmtc1 $8, $f18 \r\n"
75  "psrah $f8, $f2, $f16 \r\n"
76  "ldc1 $f6, 24(%[block]) \r\n"
77  "psrah $f10, $f6, $f16 \r\n"
78  "psubh $f8, $f8, $f6 \r\n"
79  "paddh $f10, $f10, $f2 \r\n"
80  "paddh $f20, $f4, $f0 \r\n"
81  "psubh $f0, $f0, $f4 \r\n"
82  "paddh $f22, $f10, $f20 \r\n"
83  "psubh $f4, $f20, $f10 \r\n"
84  "paddh $f20, $f8, $f0 \r\n"
85  "psubh $f0, $f0, $f8 \r\n"
86  "punpckhhw $f2, $f22, $f20 \r\n"
87  "punpcklhw $f10, $f22, $f20 \r\n"
88  "punpckhhw $f8, $f0, $f4 \r\n"
89  "punpcklhw $f0, $f0, $f4 \r\n"
90  "punpckhwd $f4, $f10, $f0 \r\n"
91  "punpcklwd $f10, $f10, $f0 \r\n"
92  "punpcklwd $f20, $f2, $f8 \r\n"
93  "punpckhwd $f0, $f2, $f8 \r\n"
94  "paddh $f10, $f10, %[ff_pw_32] \r\n"
95  "psrah $f8, $f4, $f16 \r\n"
96  "psrah $f6, $f0, $f16 \r\n"
97  "psubh $f8, $f8, $f0 \r\n"
98  "paddh $f6, $f6, $f4 \r\n"
99  "paddh $f2, $f20, $f10 \r\n"
100  "psubh $f10, $f10, $f20 \r\n"
101  "paddh $f20, $f6, $f2 \r\n"
102  "psubh $f2, $f2, $f6 \r\n"
103  "paddh $f22, $f8, $f10 \r\n"
104  "xor $f14, $f14, $f14 \r\n"
105  "psubh $f10, $f10, $f8 \r\n"
106  "sdc1 $f14, 0(%[block]) \r\n"
107  "sdc1 $f14, 8(%[block]) \r\n"
108  "sdc1 $f14, 16(%[block]) \r\n"
109  "sdc1 $f14, 24(%[block]) \r\n"
110  "lwc1 $f4, 0(%[dst]) \r\n"
111  "psrah $f6, $f20, $f18 \r\n"
112  "gslwxc1 $f0, 0(%[dst], %[stride]) \r\n"
113  "psrah $f8, $f22, $f18 \r\n"
114  "punpcklbh $f4, $f4, $f14 \r\n"
115  "punpcklbh $f0, $f0, $f14 \r\n"
116  "paddh $f4, $f4, $f6 \r\n"
117  "paddh $f0, $f0, $f8 \r\n"
118  "packushb $f4, $f4, $f14 \r\n"
119  "packushb $f0, $f0, $f14 \r\n"
120  "swc1 $f4, 0(%[dst]) \r\n"
121  "gsswxc1 $f0, 0(%[dst], %[stride]) \r\n"
122  "daddu %[dst], %[dst], %[stride] \r\n"
123  "daddu %[dst], %[dst], %[stride] \r\n"
124  "lwc1 $f4, 0(%[dst]) \r\n"
125  "psrah $f10, $f10, $f18 \r\n"
126  "gslwxc1 $f0, 0(%[dst], %[stride]) \r\n"
127  "psrah $f2, $f2, $f18 \r\n"
128  "punpcklbh $f4, $f4, $f14 \r\n"
129  "punpcklbh $f0, $f0, $f14 \r\n"
130  "paddh $f4, $f4, $f10 \r\n"
131  "paddh $f0, $f0, $f2 \r\n"
132  "packushb $f4, $f4, $f14 \r\n"
133  "swc1 $f4, 0(%[dst]) \r\n"
134  "packushb $f0, $f0, $f14 \r\n"
135  "gsswxc1 $f0, 0(%[dst], %[stride]) \r\n"
136  ::[dst]"r"(dst),[block]"r"(block),[stride]"r"((uint64_t)stride),
137  [ff_pw_32]"f"(ff_pw_32)
138  : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
139  "$f18","$f20","$f22"
140  );
141 
142  memset(block, 0, 32);
143 }
144 
145 void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
146 {
147  __asm__ volatile (
148  "lhu $10, 0x0(%[block]) \r\n"
149  "daddiu $29, $29, -0x20 \r\n"
150  "daddiu $10, $10, 0x20 \r\n"
151  "ldc1 $f2, 0x10(%[block]) \r\n"
152  "sh $10, 0x0(%[block]) \r\n"
153  "ldc1 $f4, 0x20(%[block]) \r\n"
154  "dli $10, 0x1 \r\n"
155  "ldc1 $f6, 0x30(%[block]) \r\n"
156  "dmtc1 $10, $f16 \r\n"
157  "ldc1 $f10, 0x50(%[block]) \r\n"
158  "ldc1 $f12, 0x60(%[block]) \r\n"
159  "ldc1 $f14, 0x70(%[block]) \r\n"
160  "mov.d $f0, $f2 \r\n"
161  "psrah $f2, $f2, $f16 \r\n"
162  "psrah $f8, $f10, $f16 \r\n"
163  "paddh $f2, $f2, $f0 \r\n"
164  "paddh $f8, $f8, $f10 \r\n"
165  "paddh $f2, $f2, $f10 \r\n"
166  "paddh $f8, $f8, $f14 \r\n"
167  "paddh $f2, $f2, $f6 \r\n"
168  "psubh $f8, $f8, $f0 \r\n"
169  "psubh $f0, $f0, $f6 \r\n"
170  "psubh $f10, $f10, $f6 \r\n"
171  "psrah $f6, $f6, $f16 \r\n"
172  "paddh $f0, $f0, $f14 \r\n"
173  "psubh $f10, $f10, $f14 \r\n"
174  "psrah $f14, $f14, $f16 \r\n"
175  "psubh $f0, $f0, $f6 \r\n"
176  "dli $10, 0x2 \r\n"
177  "psubh $f10, $f10, $f14 \r\n"
178  "dmtc1 $10, $f18 \r\n"
179  "mov.d $f14, $f2 \r\n"
180  "psrah $f2, $f2, $f18 \r\n"
181  "psrah $f6, $f8, $f18 \r\n"
182  "paddh $f6, $f6, $f0 \r\n"
183  "psrah $f0, $f0, $f18 \r\n"
184  "paddh $f2, $f2, $f10 \r\n"
185  "psrah $f10, $f10, $f18 \r\n"
186  "psubh $f0, $f0, $f8 \r\n"
187  "psubh $f14, $f14, $f10 \r\n"
188  "mov.d $f10, $f12 \r\n"
189  "psrah $f12, $f12, $f16 \r\n"
190  "psrah $f8, $f4, $f16 \r\n"
191  "paddh $f12, $f12, $f4 \r\n"
192  "psubh $f8, $f8, $f10 \r\n"
193  "ldc1 $f4, 0x0(%[block]) \r\n"
194  "ldc1 $f10, 0x40(%[block]) \r\n"
195  "paddh $f10, $f10, $f4 \r\n"
196  "paddh $f4, $f4, $f4 \r\n"
197  "paddh $f12, $f12, $f10 \r\n"
198  "psubh $f4, $f4, $f10 \r\n"
199  "paddh $f10, $f10, $f10 \r\n"
200  "paddh $f8, $f8, $f4 \r\n"
201  "psubh $f10, $f10, $f12 \r\n"
202  "paddh $f4, $f4, $f4 \r\n"
203  "paddh $f14, $f14, $f12 \r\n"
204  "psubh $f4, $f4, $f8 \r\n"
205  "paddh $f12, $f12, $f12 \r\n"
206  "paddh $f0, $f0, $f8 \r\n"
207  "psubh $f12, $f12, $f14 \r\n"
208  "paddh $f8, $f8, $f8 \r\n"
209  "paddh $f6, $f6, $f4 \r\n"
210  "psubh $f8, $f8, $f0 \r\n"
211  "paddh $f4, $f4, $f4 \r\n"
212  "paddh $f2, $f2, $f10 \r\n"
213  "psubh $f4, $f4, $f6 \r\n"
214  "paddh $f10, $f10, $f10 \r\n"
215  "sdc1 $f12, 0x0(%[block]) \r\n"
216  "psubh $f10, $f10, $f2 \r\n"
217  "punpckhhw $f12, $f14, $f0 \r\n"
218  "punpcklhw $f14, $f14, $f0 \r\n"
219  "punpckhhw $f0, $f6, $f2 \r\n"
220  "punpcklhw $f6, $f6, $f2 \r\n"
221  "punpckhwd $f2, $f14, $f6 \r\n"
222  "punpcklwd $f14, $f14, $f6 \r\n"
223  "punpckhwd $f6, $f12, $f0 \r\n"
224  "punpcklwd $f12, $f12, $f0 \r\n"
225  "ldc1 $f0, 0x0(%[block]) \r\n"
226  "sdc1 $f14, 0x0($29) \r\n"
227  "sdc1 $f2, 0x10($29) \r\n"
228  "dmfc1 $8, $f12 \r\n"
229  "dmfc1 $11, $f6 \r\n"
230  "punpckhhw $f6, $f10, $f4 \r\n"
231  "punpcklhw $f10, $f10, $f4 \r\n"
232  "punpckhhw $f4, $f8, $f0 \r\n"
233  "punpcklhw $f8, $f8, $f0 \r\n"
234  "punpckhwd $f0, $f10, $f8 \r\n"
235  "punpcklwd $f10, $f10, $f8 \r\n"
236  "punpckhwd $f8, $f6, $f4 \r\n"
237  "punpcklwd $f6, $f6, $f4 \r\n"
238  "sdc1 $f10, 0x8($29) \r\n"
239  "sdc1 $f0, 0x18($29) \r\n"
240  "dmfc1 $9, $f6 \r\n"
241  "dmfc1 $12, $f8 \r\n"
242  "ldc1 $f2, 0x18(%[block]) \r\n"
243  "ldc1 $f12, 0x28(%[block]) \r\n"
244  "ldc1 $f4, 0x38(%[block]) \r\n"
245  "ldc1 $f0, 0x58(%[block]) \r\n"
246  "ldc1 $f6, 0x68(%[block]) \r\n"
247  "ldc1 $f8, 0x78(%[block]) \r\n"
248  "mov.d $f14, $f2 \r\n"
249  "psrah $f10, $f0, $f16 \r\n"
250  "psrah $f2, $f2, $f16 \r\n"
251  "paddh $f10, $f10, $f0 \r\n"
252  "paddh $f2, $f2, $f14 \r\n"
253  "paddh $f10, $f10, $f8 \r\n"
254  "paddh $f2, $f2, $f0 \r\n"
255  "psubh $f10, $f10, $f14 \r\n"
256  "paddh $f2, $f2, $f4 \r\n"
257  "psubh $f14, $f14, $f4 \r\n"
258  "psubh $f0, $f0, $f4 \r\n"
259  "psrah $f4, $f4, $f16 \r\n"
260  "paddh $f14, $f14, $f8 \r\n"
261  "psubh $f0, $f0, $f8 \r\n"
262  "psrah $f8, $f8, $f16 \r\n"
263  "psubh $f14, $f14, $f4 \r\n"
264  "psubh $f0, $f0, $f8 \r\n"
265  "mov.d $f8, $f2 \r\n"
266  "psrah $f4, $f10, $f18 \r\n"
267  "psrah $f2, $f2, $f18 \r\n"
268  "paddh $f4, $f4, $f14 \r\n"
269  "psrah $f14, $f14, $f18 \r\n"
270  "paddh $f2, $f2, $f0 \r\n"
271  "psrah $f0, $f0, $f18 \r\n"
272  "psubh $f14, $f14, $f10 \r\n"
273  "psubh $f8, $f8, $f0 \r\n"
274  "mov.d $f0, $f6 \r\n"
275  "psrah $f6, $f6, $f16 \r\n"
276  "psrah $f10, $f12, $f16 \r\n"
277  "paddh $f6, $f6, $f12 \r\n"
278  "psubh $f10, $f10, $f0 \r\n"
279  "ldc1 $f12, 0x8(%[block]) \r\n"
280  "ldc1 $f0, 0x48(%[block]) \r\n"
281  "paddh $f0, $f0, $f12 \r\n"
282  "paddh $f12, $f12, $f12 \r\n"
283  "paddh $f6, $f6, $f0 \r\n"
284  "psubh $f12, $f12, $f0 \r\n"
285  "paddh $f0, $f0, $f0 \r\n"
286  "paddh $f10, $f10, $f12 \r\n"
287  "psubh $f0, $f0, $f6 \r\n"
288  "paddh $f12, $f12, $f12 \r\n"
289  "paddh $f8, $f8, $f6 \r\n"
290  "psubh $f12, $f12, $f10 \r\n"
291  "paddh $f6, $f6, $f6 \r\n"
292  "paddh $f14, $f14, $f10 \r\n"
293  "psubh $f6, $f6, $f8 \r\n"
294  "paddh $f10, $f10, $f10 \r\n"
295  "paddh $f4, $f4, $f12 \r\n"
296  "psubh $f10, $f10, $f14 \r\n"
297  "paddh $f12, $f12, $f12 \r\n"
298  "paddh $f2, $f2, $f0 \r\n"
299  "psubh $f12, $f12, $f4 \r\n"
300  "paddh $f0, $f0, $f0 \r\n"
301  "sdc1 $f6, 0x8(%[block]) \r\n"
302  "psubh $f0, $f0, $f2 \r\n"
303  "punpckhhw $f6, $f8, $f14 \r\n"
304  "punpcklhw $f8, $f8, $f14 \r\n"
305  "punpckhhw $f14, $f4, $f2 \r\n"
306  "punpcklhw $f4, $f4, $f2 \r\n"
307  "punpckhwd $f2, $f8, $f4 \r\n"
308  "punpcklwd $f8, $f8, $f4 \r\n"
309  "punpckhwd $f4, $f6, $f14 \r\n"
310  "punpcklwd $f6, $f6, $f14 \r\n"
311  "ldc1 $f14, 0x8(%[block]) \r\n"
312  "dmfc1 $13, $f8 \r\n"
313  "dmfc1 $15, $f2 \r\n"
314  "mov.d $f24, $f6 \r\n"
315  "mov.d $f28, $f4 \r\n"
316  "punpckhhw $f4, $f0, $f12 \r\n"
317  "punpcklhw $f0, $f0, $f12 \r\n"
318  "punpckhhw $f12, $f10, $f14 \r\n"
319  "punpcklhw $f10, $f10, $f14 \r\n"
320  "punpckhwd $f14, $f0, $f10 \r\n"
321  "punpcklwd $f0, $f0, $f10 \r\n"
322  "punpckhwd $f10, $f4, $f12 \r\n"
323  "punpcklwd $f4, $f4, $f12 \r\n"
324  "dmfc1 $14, $f0 \r\n"
325  "mov.d $f22, $f14 \r\n"
326  "mov.d $f26, $f4 \r\n"
327  "mov.d $f30, $f10 \r\n"
328  "daddiu $10, %[dst], 0x4 \r\n"
329  "dmtc1 $15, $f14 \r\n"
330  "dmtc1 $11, $f12 \r\n"
331  "ldc1 $f2, 0x10($29) \r\n"
332  "dmtc1 $8, $f6 \r\n"
333  "mov.d $f8, $f2 \r\n"
334  "psrah $f2, $f2, $f16 \r\n"
335  "psrah $f0, $f14, $f16 \r\n"
336  "paddh $f2, $f2, $f8 \r\n"
337  "paddh $f0, $f0, $f14 \r\n"
338  "paddh $f2, $f2, $f14 \r\n"
339  "paddh $f0, $f0, $f28 \r\n"
340  "paddh $f2, $f2, $f12 \r\n"
341  "psubh $f0, $f0, $f8 \r\n"
342  "psubh $f8, $f8, $f12 \r\n"
343  "psubh $f14, $f14, $f12 \r\n"
344  "psrah $f12, $f12, $f16 \r\n"
345  "paddh $f8, $f8, $f28 \r\n"
346  "psubh $f14, $f14, $f28 \r\n"
347  "psrah $f10, $f28, $f16 \r\n"
348  "psubh $f8, $f8, $f12 \r\n"
349  "psubh $f14, $f14, $f10 \r\n"
350  "mov.d $f10, $f2 \r\n"
351  "psrah $f2, $f2, $f18 \r\n"
352  "psrah $f12, $f0, $f18 \r\n"
353  "paddh $f2, $f2, $f14 \r\n"
354  "paddh $f12, $f12, $f8 \r\n"
355  "psrah $f8, $f8, $f18 \r\n"
356  "psrah $f14, $f14, $f18 \r\n"
357  "psubh $f8, $f8, $f0 \r\n"
358  "psubh $f10, $f10, $f14 \r\n"
359  "mov.d $f14, $f24 \r\n"
360  "psrah $f4, $f24, $f16 \r\n"
361  "psrah $f0, $f6, $f16 \r\n"
362  "paddh $f4, $f4, $f6 \r\n"
363  "psubh $f0, $f0, $f14 \r\n"
364  "ldc1 $f6, 0x0($29) \r\n"
365  "dmtc1 $13, $f14 \r\n"
366  "paddh $f14, $f14, $f6 \r\n"
367  "paddh $f6, $f6, $f6 \r\n"
368  "paddh $f4, $f4, $f14 \r\n"
369  "psubh $f6, $f6, $f14 \r\n"
370  "paddh $f14, $f14, $f14 \r\n"
371  "paddh $f0, $f0, $f6 \r\n"
372  "psubh $f14, $f14, $f4 \r\n"
373  "paddh $f6, $f6, $f6 \r\n"
374  "paddh $f10, $f10, $f4 \r\n"
375  "psubh $f6, $f6, $f0 \r\n"
376  "paddh $f4, $f4, $f4 \r\n"
377  "paddh $f8, $f8, $f0 \r\n"
378  "psubh $f4, $f4, $f10 \r\n"
379  "paddh $f0, $f0, $f0 \r\n"
380  "paddh $f12, $f12, $f6 \r\n"
381  "psubh $f0, $f0, $f8 \r\n"
382  "paddh $f6, $f6, $f6 \r\n"
383  "paddh $f2, $f2, $f14 \r\n"
384  "psubh $f6, $f6, $f12 \r\n"
385  "paddh $f14, $f14, $f14 \r\n"
386  "sdc1 $f6, 0x0($29) \r\n"
387  "psubh $f14, $f14, $f2 \r\n"
388  "sdc1 $f0, 0x10($29) \r\n"
389  "dmfc1 $8, $f4 \r\n"
390  "xor $f4, $f4, $f4 \r\n"
391  "sdc1 $f4, 0x0(%[block]) \r\n"
392  "sdc1 $f4, 0x8(%[block]) \r\n"
393  "sdc1 $f4, 0x10(%[block]) \r\n"
394  "sdc1 $f4, 0x18(%[block]) \r\n"
395  "sdc1 $f4, 0x20(%[block]) \r\n"
396  "sdc1 $f4, 0x28(%[block]) \r\n"
397  "sdc1 $f4, 0x30(%[block]) \r\n"
398  "sdc1 $f4, 0x38(%[block]) \r\n"
399  "sdc1 $f4, 0x40(%[block]) \r\n"
400  "sdc1 $f4, 0x48(%[block]) \r\n"
401  "sdc1 $f4, 0x50(%[block]) \r\n"
402  "sdc1 $f4, 0x58(%[block]) \r\n"
403  "sdc1 $f4, 0x60(%[block]) \r\n"
404  "sdc1 $f4, 0x68(%[block]) \r\n"
405  "sdc1 $f4, 0x70(%[block]) \r\n"
406  "sdc1 $f4, 0x78(%[block]) \r\n"
407  "dli $11, 0x6 \r\n"
408  "lwc1 $f6, 0x0(%[dst]) \r\n"
409  "dmtc1 $11, $f20 \r\n"
410  "gslwxc1 $f0, 0x0(%[dst], %[stride]) \r\n"
411  "psrah $f10, $f10, $f20 \r\n"
412  "psrah $f8, $f8, $f20 \r\n"
413  "punpcklbh $f6, $f6, $f4 \r\n"
414  "punpcklbh $f0, $f0, $f4 \r\n"
415  "paddh $f6, $f6, $f10 \r\n"
416  "paddh $f0, $f0, $f8 \r\n"
417  "packushb $f6, $f6, $f4 \r\n"
418  "packushb $f0, $f0, $f4 \r\n"
419  "swc1 $f6, 0x0(%[dst]) \r\n"
420  "gsswxc1 $f0, 0x0(%[dst], %[stride]) \r\n"
421  "daddu %[dst], %[dst], %[stride] \r\n"
422  "daddu %[dst], %[dst], %[stride] \r\n"
423  "lwc1 $f6, 0x0(%[dst]) \r\n"
424  "gslwxc1 $f0, 0x0(%[dst], %[stride]) \r\n"
425  "psrah $f12, $f12, $f20 \r\n"
426  "psrah $f2, $f2, $f20 \r\n"
427  "punpcklbh $f6, $f6, $f4 \r\n"
428  "punpcklbh $f0, $f0, $f4 \r\n"
429  "paddh $f6, $f6, $f12 \r\n"
430  "paddh $f0, $f0, $f2 \r\n"
431  "packushb $f6, $f6, $f4 \r\n"
432  "packushb $f0, $f0, $f4 \r\n"
433  "swc1 $f6, 0x0(%[dst]) \r\n"
434  "gsswxc1 $f0, 0x0(%[dst], %[stride]) \r\n"
435  "ldc1 $f10, 0x0($29) \r\n"
436  "ldc1 $f8, 0x10($29) \r\n"
437  "dmtc1 $8, $f12 \r\n"
438  "daddu %[dst], %[dst], %[stride] \r\n"
439  "daddu %[dst], %[dst], %[stride] \r\n"
440  "lwc1 $f6, 0x0(%[dst]) \r\n"
441  "gslwxc1 $f0, 0x0(%[dst], %[stride]) \r\n"
442  "psrah $f14, $f14, $f20 \r\n"
443  "psrah $f10, $f10, $f20 \r\n"
444  "punpcklbh $f6, $f6, $f4 \r\n"
445  "punpcklbh $f0, $f0, $f4 \r\n"
446  "paddh $f6, $f6, $f14 \r\n"
447  "paddh $f0, $f0, $f10 \r\n"
448  "packushb $f6, $f6, $f4 \r\n"
449  "packushb $f0, $f0, $f4 \r\n"
450  "swc1 $f6, 0x0(%[dst]) \r\n"
451  "gsswxc1 $f0, 0x0(%[dst], %[stride]) \r\n"
452  "daddu %[dst], %[dst], %[stride] \r\n"
453  "daddu %[dst], %[dst], %[stride] \r\n"
454  "lwc1 $f6, 0x0(%[dst]) \r\n"
455  "gslwxc1 $f0, 0x0(%[dst], %[stride]) \r\n"
456  "psrah $f8, $f8, $f20 \r\n"
457  "psrah $f12, $f12, $f20 \r\n"
458  "punpcklbh $f6, $f6, $f4 \r\n"
459  "punpcklbh $f0, $f0, $f4 \r\n"
460  "paddh $f6, $f6, $f8 \r\n"
461  "paddh $f0, $f0, $f12 \r\n"
462  "packushb $f6, $f6, $f4 \r\n"
463  "packushb $f0, $f0, $f4 \r\n"
464  "swc1 $f6, 0x0(%[dst]) \r\n"
465  "gsswxc1 $f0, 0x0(%[dst], %[stride]) \r\n"
466  "dmtc1 $12, $f2 \r\n"
467  "dmtc1 $9, $f12 \r\n"
468  "ldc1 $f8, 0x18($29) \r\n"
469  "mov.d $f10, $f8 \r\n"
470  "psrah $f8, $f8, $f16 \r\n"
471  "psrah $f14, $f22, $f16 \r\n"
472  "paddh $f14, $f14, $f22 \r\n"
473  "paddh $f8, $f8, $f10 \r\n"
474  "paddh $f14, $f14, $f30 \r\n"
475  "paddh $f8, $f8, $f22 \r\n"
476  "psubh $f14, $f14, $f10 \r\n"
477  "paddh $f8, $f8, $f2 \r\n"
478  "psubh $f10, $f10, $f2 \r\n"
479  "psubh $f6, $f22, $f2 \r\n"
480  "psrah $f2, $f2, $f16 \r\n"
481  "paddh $f10, $f10, $f30 \r\n"
482  "psubh $f6, $f6, $f30 \r\n"
483  "psrah $f4, $f30, $f16 \r\n"
484  "psubh $f10, $f10, $f2 \r\n"
485  "psubh $f6, $f6, $f4 \r\n"
486  "mov.d $f4, $f8 \r\n"
487  "psrah $f8, $f8, $f18 \r\n"
488  "psrah $f2, $f14, $f18 \r\n"
489  "paddh $f8, $f8, $f6 \r\n"
490  "paddh $f2, $f2, $f10 \r\n"
491  "psrah $f10, $f10, $f18 \r\n"
492  "psrah $f6, $f6, $f18 \r\n"
493  "psubh $f10, $f10, $f14 \r\n"
494  "psubh $f4, $f4, $f6 \r\n"
495  "mov.d $f6, $f26 \r\n"
496  "psrah $f0, $f26, $f16 \r\n"
497  "psrah $f14, $f12, $f16 \r\n"
498  "paddh $f0, $f0, $f12 \r\n"
499  "psubh $f14, $f14, $f6 \r\n"
500  "ldc1 $f12, 0x8($29) \r\n"
501  "dmtc1 $14, $f6 \r\n"
502  "paddh $f6, $f6, $f12 \r\n"
503  "paddh $f12, $f12, $f12 \r\n"
504  "paddh $f0, $f0, $f6 \r\n"
505  "psubh $f12, $f12, $f6 \r\n"
506  "paddh $f6, $f6, $f6 \r\n"
507  "paddh $f14, $f14, $f12 \r\n"
508  "psubh $f6, $f6, $f0 \r\n"
509  "paddh $f12, $f12, $f12 \r\n"
510  "paddh $f4, $f4, $f0 \r\n"
511  "psubh $f12, $f12, $f14 \r\n"
512  "paddh $f0, $f0, $f0 \r\n"
513  "paddh $f10, $f10, $f14 \r\n"
514  "psubh $f0, $f0, $f4 \r\n"
515  "paddh $f14, $f14, $f14 \r\n"
516  "paddh $f2, $f2, $f12 \r\n"
517  "psubh $f14, $f14, $f10 \r\n"
518  "paddh $f12, $f12, $f12 \r\n"
519  "paddh $f8, $f8, $f6 \r\n"
520  "psubh $f12, $f12, $f2 \r\n"
521  "paddh $f6, $f6, $f6 \r\n"
522  "sdc1 $f12, 0x8($29) \r\n"
523  "psubh $f6, $f6, $f8 \r\n"
524  "sdc1 $f14, 0x18($29) \r\n"
525  "dmfc1 $9, $f0 \r\n"
526  "xor $f0, $f0, $f0 \r\n"
527  "lwc1 $f12, 0x0($10) \r\n"
528  "gslwxc1 $f14, 0x0($10, %[stride]) \r\n"
529  "psrah $f4, $f4, $f20 \r\n"
530  "psrah $f10, $f10, $f20 \r\n"
531  "punpcklbh $f12, $f12, $f0 \r\n"
532  "punpcklbh $f14, $f14, $f0 \r\n"
533  "paddh $f12, $f12, $f4 \r\n"
534  "paddh $f14, $f14, $f10 \r\n"
535  "packushb $f12, $f12, $f0 \r\n"
536  "packushb $f14, $f14, $f0 \r\n"
537  "swc1 $f12, 0x0($10) \r\n"
538  "gsswxc1 $f14, 0x0($10, %[stride]) \r\n"
539  "daddu $10, $10, %[stride] \r\n"
540  "daddu $10, $10, %[stride] \r\n"
541  "lwc1 $f12, 0x0($10) \r\n"
542  "gslwxc1 $f14, 0x0($10, %[stride]) \r\n"
543  "psrah $f2, $f2, $f20 \r\n"
544  "psrah $f8, $f8, $f20 \r\n"
545  "punpcklbh $f12, $f12, $f0 \r\n"
546  "punpcklbh $f14, $f14, $f0 \r\n"
547  "paddh $f12, $f12, $f2 \r\n"
548  "paddh $f14, $f14, $f8 \r\n"
549  "packushb $f12, $f12, $f0 \r\n"
550  "packushb $f14, $f14, $f0 \r\n"
551  "swc1 $f12, 0x0($10) \r\n"
552  "gsswxc1 $f14, 0x0($10, %[stride]) \r\n"
553  "ldc1 $f4, 0x8($29) \r\n"
554  "ldc1 $f10, 0x18($29) \r\n"
555  "daddu $10, $10, %[stride] \r\n"
556  "dmtc1 $9, $f2 \r\n"
557  "daddu $10, $10, %[stride] \r\n"
558  "lwc1 $f12, 0x0($10) \r\n"
559  "gslwxc1 $f14, 0x0($10, %[stride]) \r\n"
560  "psrah $f6, $f6, $f20 \r\n"
561  "psrah $f4, $f4, $f20 \r\n"
562  "punpcklbh $f12, $f12, $f0 \r\n"
563  "punpcklbh $f14, $f14, $f0 \r\n"
564  "paddh $f12, $f12, $f6 \r\n"
565  "paddh $f14, $f14, $f4 \r\n"
566  "packushb $f12, $f12, $f0 \r\n"
567  "packushb $f14, $f14, $f0 \r\n"
568  "swc1 $f12, 0x0($10) \r\n"
569  "gsswxc1 $f14, 0x0($10, %[stride]) \r\n"
570  "daddu $10, $10, %[stride] \r\n"
571  "daddu $10, $10, %[stride] \r\n"
572  "lwc1 $f12, 0x0($10) \r\n"
573  "gslwxc1 $f14, 0x0($10, %[stride]) \r\n"
574  "psrah $f10, $f10, $f20 \r\n"
575  "psrah $f2, $f2, $f20 \r\n"
576  "punpcklbh $f12, $f12, $f0 \r\n"
577  "punpcklbh $f14, $f14, $f0 \r\n"
578  "paddh $f12, $f12, $f10 \r\n"
579  "paddh $f14, $f14, $f2 \r\n"
580  "packushb $f12, $f12, $f0 \r\n"
581  "packushb $f14, $f14, $f0 \r\n"
582  "swc1 $f12, 0x0($10) \r\n"
583  "gsswxc1 $f14, 0x0($10, %[stride]) \r\n"
584  "daddiu $29, $29, 0x20 \r\n"
585  ::[dst]"r"(dst),[block]"r"(block),[stride]"r"((uint64_t)stride)
586  :"$8","$9","$10","$11","$12","$13","$14","$15","$29","$f0","$f2","$f4",
587  "$f8","$f10","$f12","$f14","$f16","$f18","$f20","$f22","$f24","$f26",
588  "$f28","$f30"
589  );
590 
591  memset(block, 0, 128);
592 }
593 
595 {
596  __asm__ volatile (
597  "lh $8, 0x0(%[block]) \r\n"
598  "sd $0, 0x0(%[block]) \r\n"
599  "daddiu $8, $8, 0x20 \r\n"
600  "daddu $10, %[stride], %[stride] \r\n"
601  "dsra $8, $8, 0x6 \r\n"
602  "xor $f2, $f2, $f2 \r\n"
603  "mtc1 $8, $f0 \r\n"
604  "pshufh $f0, $f0, $f2 \r\n"
605  "daddu $8, $10, %[stride] \r\n"
606  "psubh $f2, $f2, $f0 \r\n"
607  "packushb $f0, $f0, $f0 \r\n"
608  "packushb $f2, $f2, $f2 \r\n"
609  "lwc1 $f4, 0x0(%[dst]) \r\n"
610  "gslwxc1 $f6, 0x0(%[dst], %[stride]) \r\n"
611  "gslwxc1 $f8, 0x0(%[dst], $10) \r\n"
612  "gslwxc1 $f10, 0x0(%[dst], $8) \r\n"
613  "paddusb $f4, $f4, $f0 \r\n"
614  "paddusb $f6, $f6, $f0 \r\n"
615  "paddusb $f8, $f8, $f0 \r\n"
616  "paddusb $f10, $f10, $f0 \r\n"
617  "psubusb $f4, $f4, $f2 \r\n"
618  "psubusb $f6, $f6, $f2 \r\n"
619  "psubusb $f8, $f8, $f2 \r\n"
620  "psubusb $f10, $f10, $f2 \r\n"
621  "swc1 $f4, 0x0(%[dst]) \r\n"
622  "gsswxc1 $f6, 0x0(%[dst], %[stride]) \r\n"
623  "gsswxc1 $f8, 0x0(%[dst], $10) \r\n"
624  "gsswxc1 $f10, 0x0(%[dst], $8) \r\n"
625  ::[dst]"r"(dst),[block]"r"(block),[stride]"r"((uint64_t)stride)
626  : "$8","$10","$f0","$f2","$f4","$f6","$f8","$f10"
627  );
628 }
629 
631 {
632  __asm__ volatile (
633  "lh $8, 0x0(%[block]) \r\n"
634  "sd $0, 0x0(%[block]) \r\n"
635  "daddiu $8, $8, 0x20 \r\n"
636  "daddu $10, %[stride], %[stride] \r\n"
637  "dsra $8, $8, 0x6 \r\n"
638  "xor $f2, $f2, $f2 \r\n"
639  "mtc1 $8, $f0 \r\n"
640  "pshufh $f0, $f0, $f2 \r\n"
641  "daddu $8, $10, %[stride] \r\n"
642  "psubh $f2, $f2, $f0 \r\n"
643  "packushb $f0, $f0, $f0 \r\n"
644  "packushb $f2, $f2, $f2 \r\n"
645  "ldc1 $f4, 0x0(%[dst]) \r\n"
646  "gsldxc1 $f6, 0x0(%[dst], %[stride]) \r\n"
647  "gsldxc1 $f8, 0x0(%[dst], $10) \r\n"
648  "gsldxc1 $f10, 0x0(%[dst], $8) \r\n"
649  "paddusb $f4, $f4, $f0 \r\n"
650  "paddusb $f6, $f6, $f0 \r\n"
651  "paddusb $f8, $f8, $f0 \r\n"
652  "paddusb $f10, $f10, $f0 \r\n"
653  "psubusb $f4, $f4, $f2 \r\n"
654  "psubusb $f6, $f6, $f2 \r\n"
655  "psubusb $f8, $f8, $f2 \r\n"
656  "psubusb $f10, $f10, $f2 \r\n"
657  "sdc1 $f4, 0x0(%[dst]) \r\n"
658  "gssdxc1 $f6, 0x0(%[dst], %[stride]) \r\n"
659  "gssdxc1 $f8, 0x0(%[dst], $10) \r\n"
660  "daddu $9, $10, $10 \r\n"
661  "gssdxc1 $f10, 0x0(%[dst], $8) \r\n"
662  "daddu %[dst], %[dst], $9 \r\n"
663  "ldc1 $f4, 0x0(%[dst]) \r\n"
664  "gsldxc1 $f6, 0x0(%[dst], %[stride]) \r\n"
665  "gsldxc1 $f8, 0x0(%[dst], $10) \r\n"
666  "gsldxc1 $f10, 0x0(%[dst], $8) \r\n"
667  "paddusb $f4, $f4, $f0 \r\n"
668  "paddusb $f6, $f6, $f0 \r\n"
669  "paddusb $f8, $f8, $f0 \r\n"
670  "paddusb $f10, $f10, $f0 \r\n"
671  "psubusb $f4, $f4, $f2 \r\n"
672  "psubusb $f6, $f6, $f2 \r\n"
673  "psubusb $f8, $f8, $f2 \r\n"
674  "psubusb $f10, $f10, $f2 \r\n"
675  "sdc1 $f4, 0x0(%[dst]) \r\n"
676  "gssdxc1 $f6, 0x0(%[dst], %[stride]) \r\n"
677  "gssdxc1 $f8, 0x0(%[dst], $10) \r\n"
678  "gssdxc1 $f10, 0x0(%[dst], $8) \r\n"
679  ::[dst]"r"(dst),[block]"r"(block),[stride]"r"((uint64_t)stride)
680  : "$8","$9","$10","$f0","$f2","$f4","$f6","$f8","$f10"
681  );
682 }
683 
684 void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
685  int16_t *block, int stride, const uint8_t nnzc[15*8])
686 {
687  int i;
688  for(i=0; i<16; i++){
689  int nnz = nnzc[ scan8[i] ];
690  if(nnz){
691  if(nnz==1 && ((int16_t*)block)[i*16])
692  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
693  stride);
694  else
695  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
696  stride);
697  }
698  }
699 }
700 
701 void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
702  int16_t *block, int stride, const uint8_t nnzc[15*8])
703 {
704  int i;
705  for(i=0; i<16; i++){
706  if(nnzc[ scan8[i] ])
707  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
708  else if(((int16_t*)block)[i*16])
709  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
710  stride);
711  }
712 }
713 
714 void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
715  int16_t *block, int stride, const uint8_t nnzc[15*8])
716 {
717  int i;
718  for(i=0; i<16; i+=4){
719  int nnz = nnzc[ scan8[i] ];
720  if(nnz){
721  if(nnz==1 && ((int16_t*)block)[i*16])
722  ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
723  block + i*16, stride);
724  else
725  ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
726  stride);
727  }
728  }
729 }
730 
731 void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
732  int16_t *block, int stride, const uint8_t nnzc[15*8])
733 {
734  int i, j;
735  for(j=1; j<3; j++){
736  for(i=j*16; i<j*16+4; i++){
737  if(nnzc[ scan8[i] ])
738  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
739  block + i*16, stride);
740  else if(((int16_t*)block)[i*16])
741  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
742  block + i*16, stride);
743  }
744  }
745 }
746 
747 void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
748  int16_t *block, int stride, const uint8_t nnzc[15*8])
749 {
750  int i, j;
751 
752  for(j=1; j<3; j++){
753  for(i=j*16; i<j*16+4; i++){
754  if(nnzc[ scan8[i] ])
755  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
756  block + i*16, stride);
757  else if(((int16_t*)block)[i*16])
758  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
759  block + i*16, stride);
760  }
761  }
762 
763  for(j=1; j<3; j++){
764  for(i=j*16+4; i<j*16+8; i++){
765  if(nnzc[ scan8[i+4] ])
766  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
767  block + i*16, stride);
768  else if(((int16_t*)block)[i*16])
769  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
770  block + i*16, stride);
771  }
772  }
773 }
774 
775 void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
776  int qmul)
777 {
778  __asm__ volatile (
779  ".set noreorder \r\n"
780  "dli $10, 0x8 \r\n"
781  "ldc1 $f6, 0x18(%[input]) \r\n"
782  "dmtc1 $10, $f16 \r\n"
783  "ldc1 $f4, 0x10(%[input]) \r\n"
784  "dli $10, 0x20 \r\n"
785  "ldc1 $f2, 0x8(%[input]) \r\n"
786  "dmtc1 $10, $f18 \r\n"
787  "ldc1 $f0, 0x0(%[input]) \r\n"
788  "mov.d $f8, $f6 \r\n"
789  "paddh $f6, $f6, $f4 \r\n"
790  "psubh $f4, $f4, $f8 \r\n"
791  "mov.d $f8, $f2 \r\n"
792  "paddh $f2, $f2, $f0 \r\n"
793  "psubh $f0, $f0, $f8 \r\n"
794  "mov.d $f8, $f6 \r\n"
795  "paddh $f6, $f6, $f2 \r\n"
796  "psubh $f2, $f2, $f8 \r\n"
797  "mov.d $f8, $f4 \r\n"
798  "paddh $f4, $f4, $f0 \r\n"
799  "psubh $f0, $f0, $f8 \r\n"
800  "mov.d $f8, $f6 \r\n"
801  "punpcklhw $f6, $f6, $f2 \r\n"
802  "punpckhhw $f8, $f8, $f2 \r\n"
803  "punpckhhw $f2, $f0, $f4 \r\n"
804  "punpcklhw $f0, $f0, $f4 \r\n"
805  "punpckhwd $f4, $f6, $f0 \r\n"
806  "punpcklwd $f6, $f6, $f0 \r\n"
807  "mov.d $f0, $f8 \r\n"
808  "punpcklwd $f8, $f8, $f2 \r\n"
809  "punpckhwd $f0, $f0, $f2 \r\n"
810  "mov.d $f2, $f0 \r\n"
811  "paddh $f0, $f0, $f8 \r\n"
812  "psubh $f8, $f8, $f2 \r\n"
813  "mov.d $f2, $f4 \r\n"
814  "paddh $f4, $f4, $f6 \r\n"
815  "psubh $f6, $f6, $f2 \r\n"
816  "mov.d $f2, $f0 \r\n"
817  "paddh $f0, $f0, $f4 \r\n"
818  "psubh $f4, $f4, $f2 \r\n"
819  "mov.d $f2, $f8 \r\n"
820  "daddiu $10, %[qmul], -0x7fff \r\n"
821  "paddh $f8, $f8, $f6 \r\n"
822  "bgtz $10, 1f \r\n"
823  "psubh $f6, $f6, $f2 \r\n"
824  "ori $10, $0, 0x80 \r\n"
825  "dsll $10, $10, 0x10 \r\n"
826  "punpckhhw $f2, $f0, %[ff_pw_1] \r\n"
827  "daddu %[qmul], %[qmul], $10 \r\n"
828  "punpcklhw $f0, $f0, %[ff_pw_1] \r\n"
829  "punpckhhw $f10, $f4, %[ff_pw_1] \r\n"
830  "punpcklhw $f4, $f4, %[ff_pw_1] \r\n"
831  "mtc1 %[qmul], $f14 \r\n"
832  "punpcklwd $f14, $f14, $f14 \r\n"
833  "pmaddhw $f0, $f0, $f14 \r\n"
834  "pmaddhw $f4, $f4, $f14 \r\n"
835  "pmaddhw $f2, $f2, $f14 \r\n"
836  "pmaddhw $f10, $f10, $f14 \r\n"
837  "psraw $f0, $f0, $f16 \r\n"
838  "psraw $f4, $f4, $f16 \r\n"
839  "psraw $f2, $f2, $f16 \r\n"
840  "psraw $f10, $f10, $f16 \r\n"
841  "packsswh $f0, $f0, $f2 \r\n"
842  "packsswh $f4, $f4, $f10 \r\n"
843  "mfc1 $9, $f0 \r\n"
844  "dsrl $f0, $f0, $f18 \r\n"
845  "mfc1 %[input], $f0 \r\n"
846  "sh $9, 0x0(%[output]) \r\n"
847  "sh %[input], 0x80(%[output]) \r\n"
848  "dsrl $9, $9, 0x10 \r\n"
849  "dsrl %[input], %[input], 0x10 \r\n"
850  "sh $9, 0x20(%[output]) \r\n"
851  "sh %[input], 0xa0(%[output]) \r\n"
852  "mfc1 $9, $f4 \r\n"
853  "dsrl $f4, $f4, $f18 \r\n"
854  "mfc1 %[input], $f4 \r\n"
855  "sh $9, 0x40(%[output]) \r\n"
856  "sh %[input], 0xc0(%[output]) \r\n"
857  "dsrl $9, $9, 0x10 \r\n"
858  "dsrl %[input], %[input], 0x10 \r\n"
859  "sh $9, 0x60(%[output]) \r\n"
860  "sh %[input], 0xe0(%[output]) \r\n"
861  "punpckhhw $f2, $f6, %[ff_pw_1] \r\n"
862  "punpcklhw $f6, $f6, %[ff_pw_1] \r\n"
863  "punpckhhw $f10, $f8, %[ff_pw_1] \r\n"
864  "punpcklhw $f8, $f8, %[ff_pw_1] \r\n"
865  "mtc1 %[qmul], $f14 \r\n"
866  "punpcklwd $f14, $f14, $f14 \r\n"
867  "pmaddhw $f6, $f6, $f14 \r\n"
868  "pmaddhw $f8, $f8, $f14 \r\n"
869  "pmaddhw $f2, $f2, $f14 \r\n"
870  "pmaddhw $f10, $f10, $f14 \r\n"
871  "psraw $f6, $f6, $f16 \r\n"
872  "psraw $f8, $f8, $f16 \r\n"
873  "psraw $f2, $f2, $f16 \r\n"
874  "psraw $f10, $f10, $f16 \r\n"
875  "packsswh $f6, $f6, $f2 \r\n"
876  "packsswh $f8, $f8, $f10 \r\n"
877  "mfc1 $9, $f6 \r\n"
878  "dsrl $f6, $f6, $f18 \r\n"
879  "mfc1 %[input], $f6 \r\n"
880  "sh $9, 0x100(%[output]) \r\n"
881  "sh %[input], 0x180(%[output]) \r\n"
882  "dsrl $9, $9, 0x10 \r\n"
883  "dsrl %[input], %[input], 0x10 \r\n"
884  "sh $9, 0x120(%[output]) \r\n"
885  "sh %[input], 0x1a0(%[output]) \r\n"
886  "mfc1 $9, $f8 \r\n"
887  "dsrl $f8, $f8, $f18 \r\n"
888  "mfc1 %[input], $f8 \r\n"
889  "sh $9, 0x140(%[output]) \r\n"
890  "sh %[input], 0x1c0(%[output]) \r\n"
891  "dsrl $9, $9, 0x10 \r\n"
892  "dsrl %[input], %[input], 0x10 \r\n"
893  "sh $9, 0x160(%[output]) \r\n"
894  "jr $31 \r\n"
895  "sh %[input], 0x1e0(%[output]) \r\n"
896  "1: \r\n"
897  "ori $10, $0, 0x1f \r\n"
898  "clz $9, %[qmul] \r\n"
899  "ori %[input], $0, 0x7 \r\n"
900  "dsubu $9, $10, $9 \r\n"
901  "ori $10, $0, 0x80 \r\n"
902  "dsll $10, $10, 0x10 \r\n"
903  "daddu %[qmul], %[qmul], $10 \r\n"
904  "dsubu $10, $9, %[input] \r\n"
905  "movn $9, %[input], $10 \r\n"
906  "daddiu %[input], %[input], 0x1 \r\n"
907  "andi $10, $9, 0xff \r\n"
908  "dsrlv %[qmul], %[qmul], $10 \r\n"
909  "dsubu %[input], %[input], $9 \r\n"
910  "mtc1 %[input], $f12 \r\n"
911  "punpckhhw $f2, $f0, %[ff_pw_1] \r\n"
912  "punpcklhw $f0, $f0, %[ff_pw_1] \r\n"
913  "punpckhhw $f10, $f4, %[ff_pw_1] \r\n"
914  "punpcklhw $f4, $f4, %[ff_pw_1] \r\n"
915  "mtc1 %[qmul], $f14 \r\n"
916  "punpcklwd $f14, $f14, $f14 \r\n"
917  "pmaddhw $f0, $f0, $f14 \r\n"
918  "pmaddhw $f4, $f4, $f14 \r\n"
919  "pmaddhw $f2, $f2, $f14 \r\n"
920  "pmaddhw $f10, $f10, $f14 \r\n"
921  "psraw $f0, $f0, $f12 \r\n"
922  "psraw $f4, $f4, $f12 \r\n"
923  "psraw $f2, $f2, $f12 \r\n"
924  "psraw $f10, $f10, $f12 \r\n"
925  "packsswh $f0, $f0, $f2 \r\n"
926  "packsswh $f4, $f4, $f10 \r\n"
927  "mfc1 $9, $f0 \r\n"
928  "dsrl $f0, $f0, $f18 \r\n"
929  "sh $9, 0x0(%[output]) \r\n"
930  "mfc1 %[input], $f0 \r\n"
931  "dsrl $9, $9, 0x10 \r\n"
932  "sh %[input], 0x80(%[output]) \r\n"
933  "sh $9, 0x20(%[output]) \r\n"
934  "dsrl %[input], %[input], 0x10 \r\n"
935  "mfc1 $9, $f4 \r\n"
936  "sh %[input], 0xa0(%[output]) \r\n"
937  "dsrl $f4, $f4, $f18 \r\n"
938  "sh $9, 0x40(%[output]) \r\n"
939  "mfc1 %[input], $f4 \r\n"
940  "dsrl $9, $9, 0x10 \r\n"
941  "sh %[input], 0xc0(%[output]) \r\n"
942  "sh $9, 0x60(%[output]) \r\n"
943  "dsrl %[input], %[input], 0x10 \r\n"
944  "sh %[input], 0xe0(%[output]) \r\n"
945  "punpckhhw $f2, $f6, %[ff_pw_1] \r\n"
946  "punpcklhw $f6, $f6, %[ff_pw_1] \r\n"
947  "punpckhhw $f10, $f8, %[ff_pw_1] \r\n"
948  "punpcklhw $f8, $f8, %[ff_pw_1] \r\n"
949  "mtc1 %[qmul], $f14 \r\n"
950  "punpcklwd $f14, $f14, $f14 \r\n"
951  "pmaddhw $f6, $f6, $f14 \r\n"
952  "pmaddhw $f8, $f8, $f14 \r\n"
953  "pmaddhw $f2, $f2, $f14 \r\n"
954  "pmaddhw $f10, $f10, $f14 \r\n"
955  "psraw $f6, $f6, $f12 \r\n"
956  "psraw $f8, $f8, $f12 \r\n"
957  "psraw $f2, $f2, $f12 \r\n"
958  "psraw $f10, $f10, $f12 \r\n"
959  "packsswh $f6, $f6, $f2 \r\n"
960  "packsswh $f8, $f8, $f10 \r\n"
961  "mfc1 $9, $f6 \r\n"
962  "dsrl $f6, $f6, $f18 \r\n"
963  "mfc1 %[input], $f6 \r\n"
964  "sh $9, 0x100(%[output]) \r\n"
965  "sh %[input], 0x180(%[output]) \r\n"
966  "dsrl $9, $9, 0x10 \r\n"
967  "dsrl %[input], %[input], 0x10 \r\n"
968  "sh $9, 0x120(%[output]) \r\n"
969  "sh %[input], 0x1a0(%[output]) \r\n"
970  "mfc1 $9, $f8 \r\n"
971  "dsrl $f8, $f8, $f18 \r\n"
972  "mfc1 %[input], $f8 \r\n"
973  "sh $9, 0x140(%[output]) \r\n"
974  "sh %[input], 0x1c0(%[output]) \r\n"
975  "dsrl $9, $9, 0x10 \r\n"
976  "dsrl %[input], %[input], 0x10 \r\n"
977  "sh $9, 0x160(%[output]) \r\n"
978  "sh %[input], 0x1e0(%[output]) \r\n"
979  ".set reorder \r\n"
980  ::[output]"r"(output),[input]"r"(input),[qmul]"r"((uint64_t)qmul),
981  [ff_pw_1]"f"(ff_pw_1)
982  : "$9","$10","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
983  "$f18"
984  );
985 }
986 
988 {
989  int temp[8];
990  int t[8];
991 
992  temp[0] = block[0] + block[16];
993  temp[1] = block[0] - block[16];
994  temp[2] = block[32] + block[48];
995  temp[3] = block[32] - block[48];
996  temp[4] = block[64] + block[80];
997  temp[5] = block[64] - block[80];
998  temp[6] = block[96] + block[112];
999  temp[7] = block[96] - block[112];
1000 
1001  t[0] = temp[0] + temp[4] + temp[2] + temp[6];
1002  t[1] = temp[0] - temp[4] + temp[2] - temp[6];
1003  t[2] = temp[0] - temp[4] - temp[2] + temp[6];
1004  t[3] = temp[0] + temp[4] - temp[2] - temp[6];
1005  t[4] = temp[1] + temp[5] + temp[3] + temp[7];
1006  t[5] = temp[1] - temp[5] + temp[3] - temp[7];
1007  t[6] = temp[1] - temp[5] - temp[3] + temp[7];
1008  t[7] = temp[1] + temp[5] - temp[3] - temp[7];
1009 
1010  block[ 0]= (t[0]*qmul + 128) >> 8;
1011  block[ 32]= (t[1]*qmul + 128) >> 8;
1012  block[ 64]= (t[2]*qmul + 128) >> 8;
1013  block[ 96]= (t[3]*qmul + 128) >> 8;
1014  block[ 16]= (t[4]*qmul + 128) >> 8;
1015  block[ 48]= (t[5]*qmul + 128) >> 8;
1016  block[ 80]= (t[6]*qmul + 128) >> 8;
1017  block[112]= (t[7]*qmul + 128) >> 8;
1018 }
1019 
1021 {
1022  int a,b,c,d;
1023 
1024  d = block[0] - block[16];
1025  a = block[0] + block[16];
1026  b = block[32] - block[48];
1027  c = block[32] + block[48];
1028  block[0] = ((a+c)*qmul) >> 7;
1029  block[16]= ((d+b)*qmul) >> 7;
1030  block[32]= ((a-c)*qmul) >> 7;
1031  block[48]= ((d-b)*qmul) >> 7;
1032 }
1033 
1035  int height, int log2_denom, int weight, int offset)
1036 {
1037  int y;
1038 
1039  offset <<= log2_denom;
1040 
1041  if (log2_denom)
1042  offset += 1 << (log2_denom - 1);
1043 
1044  for (y=0; y<height; y++, block+=stride) {
1045  __asm__ volatile (
1046  "ldc1 $f2, %0 \r\n"
1047  "ldc1 $f4, %1 \r\n"
1048  "dmtc1 $0, $f20 \r\n"
1049  "mtc1 %2, $f6 \r\n"
1050  "mtc1 %3, $f8 \r\n"
1051  "mtc1 %4, $f10 \r\n"
1052  "pshufh $f6, $f6, $f20 \r\n"
1053  "pshufh $f8, $f8, $f20 \r\n"
1054  "punpckhbh $f14, $f2, $f20 \r\n"
1055  "punpckhbh $f16, $f4, $f20 \r\n"
1056  "punpcklbh $f2, $f2, $f20 \r\n"
1057  "punpcklbh $f4, $f4, $f20 \r\n"
1058  "pmullh $f14, $f14, $f6 \r\n"
1059  "pmullh $f16, $f16, $f6 \r\n"
1060  "pmullh $f2, $f2, $f6 \r\n"
1061  "pmullh $f4, $f4, $f6 \r\n"
1062  "paddsh $f14, $f14, $f8 \r\n"
1063  "paddsh $f16, $f16, $f8 \r\n"
1064  "paddsh $f2, $f2, $f8 \r\n"
1065  "paddsh $f4, $f4, $f8 \r\n"
1066  "psrah $f14, $f14, $f10 \r\n"
1067  "psrah $f16, $f16, $f10 \r\n"
1068  "psrah $f2, $f2, $f10 \r\n"
1069  "psrah $f4, $f4, $f10 \r\n"
1070  "packushb $f2, $f2, $f14 \r\n"
1071  "packushb $f4, $f4, $f16 \r\n"
1072  "sdc1 $f2, %0 \r\n"
1073  "sdc1 $f4, %1 \r\n"
1074  : "=m"(*block),"=m"(*(block + 8))
1075  : "r"(weight),"r"(offset),"r"(log2_denom)
1076  );
1077  }
1078 }
1079 
1081  int stride, int height, int log2_denom, int weightd, int weights,
1082  int offset)
1083 {
1084  int y;
1085 
1086  offset = ((offset + 1) | 1) << log2_denom;
1087 
1088  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1089  __asm__ volatile (
1090  "ldc1 $f2, %2 \r\n"
1091  "ldc1 $f4, %3 \r\n"
1092  "dmtc1 $0, $f20 \r\n"
1093  "mtc1 %6, $f6 \r\n"
1094  "mtc1 %7, $f8 \r\n"
1095  "mtc1 %8, $f10 \r\n"
1096  "mtc1 %9, $f12 \r\n"
1097  "pshufh $f6, $f6, $f20 \r\n"
1098  "pshufh $f8, $f8, $f20 \r\n"
1099  "pshufh $f10, $f10, $f20 \r\n"
1100  "punpckhbh $f14, $f2, $f20 \r\n"
1101  "punpckhbh $f16, $f4, $f20 \r\n"
1102  "punpcklbh $f2, $f2, $f20 \r\n"
1103  "punpcklbh $f4, $f4, $f20 \r\n"
1104  "pmullh $f14, $f14, $f6 \r\n"
1105  "pmullh $f16, $f16, $f8 \r\n"
1106  "pmullh $f2, $f2, $f6 \r\n"
1107  "pmullh $f4, $f4, $f8 \r\n"
1108  "paddsh $f14, $f14, $f10 \r\n"
1109  "paddsh $f2, $f2, $f10 \r\n"
1110  "paddsh $f14, $f14, $f16 \r\n"
1111  "paddsh $f2, $f2, $f4 \r\n"
1112  "psrah $f14, $f14, $f12 \r\n"
1113  "psrah $f2, $f2, $f12 \r\n"
1114  "packushb $f2, $f2, $f14 \r\n"
1115  "sdc1 $f2, %0 \r\n"
1116  "ldc1 $f2, %4 \r\n"
1117  "ldc1 $f4, %5 \r\n"
1118  "punpckhbh $f14, $f2, $f20 \r\n"
1119  "punpckhbh $f16, $f4, $f20 \r\n"
1120  "punpcklbh $f2, $f2, $f20 \r\n"
1121  "punpcklbh $f4, $f4, $f20 \r\n"
1122  "pmullh $f14, $f14, $f6 \r\n"
1123  "pmullh $f16, $f16, $f8 \r\n"
1124  "pmullh $f2, $f2, $f6 \r\n"
1125  "pmullh $f4, $f4, $f8 \r\n"
1126  "paddsh $f14, $f14, $f10 \r\n"
1127  "paddsh $f2, $f2, $f10 \r\n"
1128  "paddsh $f14, $f14, $f16 \r\n"
1129  "paddsh $f2, $f2, $f4 \r\n"
1130  "psrah $f14, $f14, $f12 \r\n"
1131  "psrah $f2, $f2, $f12 \r\n"
1132  "packushb $f2, $f2, $f14 \r\n"
1133  "sdc1 $f2, %1 \r\n"
1134  : "=m"(*dst),"=m"(*(dst+8))
1135  : "m"(*src),"m"(*dst),"m"(*(src+8)),"m"(*(dst+8)),
1136  "r"(weights),"r"(weightd),"r"(offset),"r"(log2_denom+1)
1137  );
1138  }
1139 }
1140 
1142  int log2_denom, int weight, int offset)
1143 {
1144  int y;
1145 
1146  offset <<= log2_denom;
1147 
1148  if (log2_denom)
1149  offset += 1 << (log2_denom - 1);
1150 
1151  for (y=0; y<height; y++, block+=stride) {
1152  __asm__ volatile (
1153  "ldc1 $f2, %0 \r\n"
1154  "mtc1 %1, $f6 \r\n"
1155  "mtc1 %2, $f8 \r\n"
1156  "mtc1 %3, $f10 \r\n"
1157  "dmtc1 $0, $f20 \r\n"
1158  "pshufh $f6, $f6, $f20 \r\n"
1159  "pshufh $f8, $f8, $f20 \r\n"
1160  "punpckhbh $f14, $f2, $f20 \r\n"
1161  "punpcklbh $f2, $f2, $f20 \r\n"
1162  "pmullh $f14, $f14, $f6 \r\n"
1163  "pmullh $f2, $f2, $f6 \r\n"
1164  "paddsh $f14, $f14, $f8 \r\n"
1165  "paddsh $f2, $f2, $f8 \r\n"
1166  "psrah $f14, $f14, $f10 \r\n"
1167  "psrah $f2, $f2, $f10 \r\n"
1168  "packushb $f2, $f2, $f14 \r\n"
1169  "sdc1 $f2, %0 \r\n"
1170  : "=m"(*block)
1171  : "r"(weight),"r"(offset),"r"(log2_denom)
1172  );
1173  }
1174 }
1175 
1177  int stride, int height, int log2_denom, int weightd, int weights,
1178  int offset)
1179 {
1180  int y;
1181 
1182  offset = ((offset + 1) | 1) << log2_denom;
1183 
1184  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1185  __asm__ volatile (
1186  "ldc1 $f2, %1 \r\n"
1187  "ldc1 $f4, %2 \r\n"
1188  "dmtc1 $0, $f20 \r\n"
1189  "mtc1 %3, $f6 \r\n"
1190  "mtc1 %4, $f8 \r\n"
1191  "mtc1 %5, $f10 \r\n"
1192  "mtc1 %6, $f12 \r\n"
1193  "pshufh $f6, $f6, $f20 \r\n"
1194  "pshufh $f8, $f8, $f20 \r\n"
1195  "pshufh $f10, $f10, $f20 \r\n"
1196  "punpckhbh $f14, $f2, $f20 \r\n"
1197  "punpckhbh $f16, $f4, $f20 \r\n"
1198  "punpcklbh $f2, $f2, $f20 \r\n"
1199  "punpcklbh $f4, $f4, $f20 \r\n"
1200  "pmullh $f14, $f14, $f6 \r\n"
1201  "pmullh $f16, $f16, $f8 \r\n"
1202  "pmullh $f2, $f2, $f6 \r\n"
1203  "pmullh $f4, $f4, $f8 \r\n"
1204  "paddsh $f14, $f14, $f10 \r\n"
1205  "paddsh $f2, $f2, $f10 \r\n"
1206  "paddsh $f14, $f14, $f16 \r\n"
1207  "paddsh $f2, $f2, $f4 \r\n"
1208  "psrah $f14, $f14, $f12 \r\n"
1209  "psrah $f2, $f2, $f12 \r\n"
1210  "packushb $f2, $f2, $f14 \r\n"
1211  "sdc1 $f2, %0 \r\n"
1212  : "=m"(*dst)
1213  : "m"(*src),"m"(*dst),"r"(weights),
1214  "r"(weightd),"r"(offset),"r"(log2_denom+1)
1215  );
1216  }
1217 }
1218 
1220  int log2_denom, int weight, int offset)
1221 {
1222  int y;
1223 
1224  offset <<= log2_denom;
1225 
1226  if (log2_denom)
1227  offset += 1 << (log2_denom - 1);
1228 
1229  for (y=0; y<height; y++, block+=stride) {
1230  __asm__ volatile (
1231  "lwc1 $f2, %0 \r\n"
1232  "mtc1 %1, $f6 \r\n"
1233  "mtc1 %2, $f8 \r\n"
1234  "mtc1 %3, $f10 \r\n"
1235  "dmtc1 $0, $f20 \r\n"
1236  "pshufh $f6, $f6, $f20 \r\n"
1237  "pshufh $f8, $f8, $f20 \r\n"
1238  "punpcklbh $f2, $f2, $f20 \r\n"
1239  "pmullh $f2, $f2, $f6 \r\n"
1240  "paddsh $f2, $f2, $f8 \r\n"
1241  "psrah $f2, $f2, $f10 \r\n"
1242  "packushb $f2, $f2, $f20 \r\n"
1243  "swc1 $f2, %0 \r\n"
1244  : "=m"(*block)
1245  : "r"(weight),"r"(offset),"r"(log2_denom)
1246  );
1247  }
1248 }
1249 
1251  int stride, int height, int log2_denom, int weightd, int weights,
1252  int offset)
1253 {
1254  int y;
1255 
1256  offset = ((offset + 1) | 1) << log2_denom;
1257 
1258  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1259  __asm__ volatile (
1260  "lwc1 $f2, %1 \r\n"
1261  "lwc1 $f4, %2 \r\n"
1262  "dmtc1 $0, $f20 \r\n"
1263  "mtc1 %3, $f6 \r\n"
1264  "mtc1 %4, $f8 \r\n"
1265  "mtc1 %5, $f10 \r\n"
1266  "mtc1 %6, $f12 \r\n"
1267  "pshufh $f6, $f6, $f20 \r\n"
1268  "pshufh $f8, $f8, $f20 \r\n"
1269  "pshufh $f10, $f10, $f20 \r\n"
1270  "punpcklbh $f2, $f2, $f20 \r\n"
1271  "punpcklbh $f4, $f4, $f20 \r\n"
1272  "pmullh $f2, $f2, $f6 \r\n"
1273  "pmullh $f4, $f4, $f8 \r\n"
1274  "paddsh $f2, $f2, $f10 \r\n"
1275  "paddsh $f2, $f2, $f4 \r\n"
1276  "psrah $f2, $f2, $f12 \r\n"
1277  "packushb $f2, $f2, $f20 \r\n"
1278  "swc1 $f2, %0 \r\n"
1279  : "=m"(*dst)
1280  : "m"(*src),"m"(*dst),"r"(weights),
1281  "r"(weightd),"r"(offset),"r"(log2_denom+1)
1282  );
1283  }
1284 }
1285 
1286 static void inline chroma_inter_body_mmi(uint8_t *pix, int stride,
1287  int alpha, int beta, int8_t *tc0)
1288 {
1289  __asm__ volatile (
1290  "xor $f16, $f16, $f16 \r\n"
1291  "mtc1 %[alpha], $f8 \r\n"
1292  "mtc1 %[beta], $f10 \r\n"
1293  "pshufh $f8, $f8, $f16 \r\n"
1294  "pshufh $f10, $f10, $f16 \r\n"
1295  "packushb $f8, $f8, $f8 \r\n"
1296  "packushb $f10, $f10, $f10 \r\n"
1297  "psubusb $f12, $f4, $f2 \r\n"
1298  "psubusb $f14, $f2, $f4 \r\n"
1299  "or $f14, $f14, $f12 \r\n"
1300  "psubusb $f14, $f14, $f8 \r\n"
1301  "psubusb $f12, $f2, $f0 \r\n"
1302  "psubusb $f8, $f0, $f2 \r\n"
1303  "or $f8, $f8, $f12 \r\n"
1304  "psubusb $f8, $f8, $f10 \r\n"
1305  "or $f14, $f14, $f8 \r\n"
1306  "psubusb $f12, $f4, $f6 \r\n"
1307  "psubusb $f8, $f6, $f4 \r\n"
1308  "or $f8, $f8, $f12 \r\n"
1309  "psubusb $f8, $f8, $f10 \r\n"
1310  "or $f14, $f14, $f8 \r\n"
1311  "xor $f12, $f12, $f12 \r\n"
1312  "pcmpeqb $f14, $f14, $f12 \r\n"
1313  "lwc1 $f12, 0x0(%[tc0]) \r\n"
1314  "punpcklbh $f12, $f12, $f12 \r\n"
1315  "and $f14, $f14, $f12 \r\n"
1316  "pcmpeqb $f8, $f8, $f8 \r\n"
1317  "xor $f10, $f2, $f4 \r\n"
1318  "xor $f6, $f6, $f8 \r\n"
1319  "and $f10, $f10, %[ff_pb_1] \r\n"
1320  "pavgb $f6, $f6, $f0 \r\n"
1321  "xor $f8, $f8, $f2 \r\n"
1322  "pavgb $f6, $f6, %[ff_pb_3] \r\n"
1323  "pavgb $f8, $f8, $f4 \r\n"
1324  "pavgb $f6, $f6, $f10 \r\n"
1325  "paddusb $f6, $f6, $f8 \r\n"
1326  "psubusb $f12, %[ff_pb_A1], $f6 \r\n"
1327  "psubusb $f6, $f6, %[ff_pb_A1] \r\n"
1328  "pminub $f12, $f12, $f14 \r\n"
1329  "pminub $f6, $f6, $f14 \r\n"
1330  "psubusb $f2, $f2, $f12 \r\n"
1331  "psubusb $f4, $f4, $f6 \r\n"
1332  "paddusb $f2, $f2, $f6 \r\n"
1333  "paddusb $f4, $f4, $f12 \r\n"
1334  ::[pix]"r"(pix),[stride]"r"((int64_t)stride),
1335  [alpha]"r"((int64_t)alpha),[beta]"r"((int64_t)beta),[tc0]"r"(tc0),
1336  [ff_pb_1]"f"(ff_pb_1),[ff_pb_3]"f"(ff_pb_3),[ff_pb_A1]"f"(ff_pb_A1)
1337  : "$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16"
1338  );
1339 }
1340 
1341 static void inline chroma_intra_body_mmi(uint8_t *pix, int stride,
1342  int alpha, int beta)
1343 {
1344  __asm__ volatile (
1345  "xor $f16, $f16, $f16 \r\n"
1346  "mtc1 %[alpha], $f8 \r\n"
1347  "mtc1 %[beta], $f10 \r\n"
1348  "pshufh $f8, $f8, $f16 \r\n"
1349  "pshufh $f10, $f10, $f16 \r\n"
1350  "packushb $f8, $f8, $f8 \r\n"
1351  "packushb $f10, $f10, $f10 \r\n"
1352  "psubusb $f12, $f4, $f2 \r\n"
1353  "psubusb $f14, $f2, $f4 \r\n"
1354  "or $f14, $f14, $f12 \r\n"
1355  "psubusb $f14, $f14, $f8 \r\n"
1356  "psubusb $f12, $f2, $f0 \r\n"
1357  "psubusb $f8, $f0, $f2 \r\n"
1358  "or $f8, $f8, $f12 \r\n"
1359  "psubusb $f8, $f8, $f10 \r\n"
1360  "or $f14, $f14, $f8 \r\n"
1361  "psubusb $f12, $f4, $f6 \r\n"
1362  "psubusb $f8, $f6, $f4 \r\n"
1363  "or $f8, $f8, $f12 \r\n"
1364  "psubusb $f8, $f8, $f10 \r\n"
1365  "or $f14, $f14, $f8 \r\n"
1366  "xor $f12, $f12, $f12 \r\n"
1367  "pcmpeqb $f14, $f14, $f12 \r\n"
1368  "mov.d $f10, $f2 \r\n"
1369  "mov.d $f12, $f4 \r\n"
1370  "xor $f8, $f2, $f6 \r\n"
1371  "and $f8, $f8, %[ff_pb_1] \r\n"
1372  "pavgb $f2, $f2, $f6 \r\n"
1373  "psubusb $f2, $f2, $f8 \r\n"
1374  "pavgb $f2, $f2, $f0 \r\n"
1375  "xor $f8, $f4, $f0 \r\n"
1376  "and $f8, $f8, %[ff_pb_1] \r\n"
1377  "pavgb $f4, $f4, $f0 \r\n"
1378  "psubusb $f4, $f4, $f8 \r\n"
1379  "pavgb $f4, $f4, $f6 \r\n"
1380  "psubb $f2, $f2, $f10 \r\n"
1381  "psubb $f4, $f4, $f12 \r\n"
1382  "and $f2, $f2, $f14 \r\n"
1383  "and $f4, $f4, $f14 \r\n"
1384  "paddb $f2, $f2, $f10 \r\n"
1385  "paddb $f4, $f4, $f12 \r\n"
1386  ::[pix]"r"(pix),[stride]"r"((int64_t)stride),
1387  [alpha]"r"((int64_t)alpha),[beta]"r"((int64_t)beta),
1388  [ff_pb_1]"f"(ff_pb_1)
1389  : "$f0","$f2","$f4","$f8","$f10","$f12","$f14","$f16"
1390  );
1391 }
1392 
1393 void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1394  int8_t *tc0)
1395 {
1396  __asm__ volatile (
1397  "daddu $8, %[stride], %[stride] \r\n"
1398  "xor $f16, $f16, $f16 \r\n"
1399  "daddu $9, %[stride], $8 \r\n"
1400  "daddiu %[alpha], %[alpha], -0x1 \r\n"
1401  "dsubu $9, $0, $9 \r\n"
1402  "daddiu %[beta], %[beta], -0x1 \r\n"
1403  "daddu $9, $9, %[pix] \r\n"
1404  "ldc1 $f4, 0x0(%[pix]) \r\n"
1405  "gsldxc1 $f0, 0x0($9, %[stride]) \r\n"
1406  "gsldxc1 $f2, 0x0($9, $8) \r\n"
1407  "gsldxc1 $f6, 0x0(%[pix], %[stride]) \r\n"
1408  "mtc1 %[alpha], $f8 \r\n"
1409  "mtc1 %[beta], $f10 \r\n"
1410  "pshufh $f8, $f8, $f16 \r\n"
1411  "pshufh $f10, $f10, $f16 \r\n"
1412  "packushb $f8, $f8, $f8 \r\n"
1413  "packushb $f10, $f10, $f10 \r\n"
1414  "psubusb $f12, $f4, $f2 \r\n"
1415  "psubusb $f14, $f2, $f4 \r\n"
1416  "or $f14, $f14, $f12 \r\n"
1417  "psubusb $f12, $f2, $f0 \r\n"
1418  "psubusb $f14, $f14, $f8 \r\n"
1419  "psubusb $f8, $f0, $f2 \r\n"
1420  "or $f8, $f8, $f12 \r\n"
1421  "psubusb $f12, $f4, $f6 \r\n"
1422  "psubusb $f8, $f8, $f10 \r\n"
1423  "or $f14, $f14, $f8 \r\n"
1424  "psubusb $f8, $f6, $f4 \r\n"
1425  "or $f8, $f8, $f12 \r\n"
1426  "psubusb $f8, $f8, $f10 \r\n"
1427  "or $f14, $f14, $f8 \r\n"
1428  "pcmpeqb $f14, $f14, $f16 \r\n"
1429  "pcmpeqb $f6, $f6, $f6 \r\n"
1430  "gslwlc1 $f8, 0x3(%[tc0]) \r\n"
1431  "gslwrc1 $f8, 0x0(%[tc0]) \r\n"
1432  "punpcklbh $f8, $f8, $f8 \r\n"
1433  "punpcklbh $f18, $f8, $f8 \r\n"
1434  "pcmpgtb $f8, $f18, $f6 \r\n"
1435  "ldc1 $f6, 0x0($9) \r\n"
1436  "and $f20, $f8, $f14 \r\n"
1437  "psubusb $f14, $f6, $f2 \r\n"
1438  "psubusb $f12, $f2, $f6 \r\n"
1439  "psubusb $f14, $f14, $f10 \r\n"
1440  "psubusb $f12, $f12, $f10 \r\n"
1441  "pcmpeqb $f12, $f12, $f14 \r\n"
1442  "and $f12, $f12, $f20 \r\n"
1443  "and $f8, $f20, $f18 \r\n"
1444  "psubb $f14, $f8, $f12 \r\n"
1445  "and $f12, $f12, $f8 \r\n"
1446  "pavgb $f8, $f2, $f4 \r\n"
1447  "ldc1 $f22, 0x0($9) \r\n"
1448  "pavgb $f6, $f6, $f8 \r\n"
1449  "xor $f8, $f8, $f22 \r\n"
1450  "and $f8, $f8, %[ff_pb_1] \r\n"
1451  "psubusb $f6, $f6, $f8 \r\n"
1452  "psubusb $f8, $f0, $f12 \r\n"
1453  "paddusb $f12, $f12, $f0 \r\n"
1454  "pmaxub $f6, $f6, $f8 \r\n"
1455  "pminub $f6, $f6, $f12 \r\n"
1456  "gssdxc1 $f6, 0x0($9, %[stride]) \r\n"
1457  "gsldxc1 $f8, 0x0(%[pix], $8) \r\n"
1458  "psubusb $f6, $f8, $f4 \r\n"
1459  "psubusb $f12, $f4, $f8 \r\n"
1460  "psubusb $f6, $f6, $f10 \r\n"
1461  "psubusb $f12, $f12, $f10 \r\n"
1462  "pcmpeqb $f12, $f12, $f6 \r\n"
1463  "and $f12, $f12, $f20 \r\n"
1464  "psubb $f14, $f14, $f12 \r\n"
1465  "and $f10, $f18, $f12 \r\n"
1466  "gsldxc1 $f6, 0x0(%[pix], %[stride]) \r\n"
1467  "pavgb $f12, $f2, $f4 \r\n"
1468  "gsldxc1 $f22, 0x0(%[pix], $8) \r\n"
1469  "pavgb $f8, $f8, $f12 \r\n"
1470  "xor $f12, $f12, $f22 \r\n"
1471  "and $f12, $f12, %[ff_pb_1] \r\n"
1472  "psubusb $f8, $f8, $f12 \r\n"
1473  "psubusb $f12, $f6, $f10 \r\n"
1474  "paddusb $f10, $f10, $f6 \r\n"
1475  "pmaxub $f8, $f8, $f12 \r\n"
1476  "pminub $f8, $f8, $f10 \r\n"
1477  "gssdxc1 $f8, 0x0(%[pix], %[stride]) \r\n"
1478  "xor $f10, $f2, $f4 \r\n"
1479  "pcmpeqb $f8, $f8, $f8 \r\n"
1480  "and $f10, $f10, %[ff_pb_1] \r\n"
1481  "xor $f6, $f6, $f8 \r\n"
1482  "xor $f8, $f8, $f2 \r\n"
1483  "pavgb $f6, $f6, $f0 \r\n"
1484  "pavgb $f6, $f6, %[ff_pb_3] \r\n"
1485  "pavgb $f8, $f8, $f4 \r\n"
1486  "pavgb $f6, $f6, $f10 \r\n"
1487  "paddusb $f6, $f6, $f8 \r\n"
1488  "psubusb $f12, %[ff_pb_A1], $f6 \r\n"
1489  "psubusb $f6, $f6, %[ff_pb_A1] \r\n"
1490  "pminub $f12, $f12, $f14 \r\n"
1491  "pminub $f6, $f6, $f14 \r\n"
1492  "psubusb $f2, $f2, $f12 \r\n"
1493  "psubusb $f4, $f4, $f6 \r\n"
1494  "paddusb $f2, $f2, $f6 \r\n"
1495  "paddusb $f4, $f4, $f12 \r\n"
1496  "gssdxc1 $f2, 0x0($9, $8) \r\n"
1497  "sdc1 $f4, 0x0(%[pix]) \r\n"
1498  ::[pix]"r"(pix),[stride]"r"((int64_t)stride),
1499  [alpha]"r"((int64_t)alpha),[beta]"r"((int64_t)beta),[tc0]"r"(tc0),
1500  [ff_pb_1]"f"(ff_pb_1),[ff_pb_3]"f"(ff_pb_3),[ff_pb_A1]"f"(ff_pb_A1)
1501  : "$8","$9","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
1502  "$f18","$f20","$f22"
1503  );
1504 }
1505 
1507  int beta)
1508 {
1509  uint64_t stack[0xa];
1510 
1511  __asm__ volatile (
1512  "ori $8, $0, 0x1 \r\n"
1513  "xor $f30, $f30, $f30 \r\n"
1514  "dmtc1 $8, $f16 \r\n"
1515  "dsll $8, %[stride], 2 \r\n"
1516  "daddu $10, %[stride], %[stride] \r\n"
1517  "daddiu %[alpha], %[alpha], -0x1 \r\n"
1518  "dsll $f20, $f16, $f16 \r\n"
1519  "bltz %[alpha], 1f \r\n"
1520  "daddu $9, $10, %[stride] \r\n"
1521  "daddiu %[beta], %[beta], -0x1 \r\n"
1522  "bltz %[beta], 1f \r\n"
1523  "dsubu $8, $0, $8 \r\n"
1524  "daddu $8, $8, %[pix] \r\n"
1525  "ldc1 $f4, 0x0(%[pix]) \r\n"
1526  "gsldxc1 $f0, 0x0($8, $10) \r\n"
1527  "gsldxc1 $f2, 0x0($8, $9) \r\n"
1528  "gsldxc1 $f6, 0x0(%[pix], %[stride]) \r\n"
1529  "mtc1 %[alpha], $f8 \r\n"
1530  "mtc1 %[beta], $f10 \r\n"
1531  "pshufh $f8, $f8, $f30 \r\n"
1532  "pshufh $f10, $f10, $f30 \r\n"
1533  "packushb $f8, $f8, $f8 \r\n"
1534  "psubusb $f12, $f4, $f2 \r\n"
1535  "psubusb $f14, $f2, $f4 \r\n"
1536  "packushb $f10, $f10, $f10 \r\n"
1537  "or $f14, $f14, $f12 \r\n"
1538  "sdc1 $f8, 0x10+%[stack] \r\n"
1539  "psubusb $f14, $f14, $f8 \r\n"
1540  "psubusb $f12, $f2, $f0 \r\n"
1541  "psubusb $f8, $f0, $f2 \r\n"
1542  "or $f8, $f8, $f12 \r\n"
1543  "psubusb $f8, $f8, $f10 \r\n"
1544  "or $f14, $f14, $f8 \r\n"
1545  "psubusb $f12, $f4, $f6 \r\n"
1546  "psubusb $f8, $f6, $f4 \r\n"
1547  "or $f8, $f8, $f12 \r\n"
1548  "psubusb $f8, $f8, $f10 \r\n"
1549  "or $f14, $f14, $f8 \r\n"
1550  "xor $f12, $f12, $f12 \r\n"
1551  "ldc1 $f8, 0x10+%[stack] \r\n"
1552  "pcmpeqb $f14, $f14, $f12 \r\n"
1553  "sdc1 $f14, 0x20+%[stack] \r\n"
1554  "pavgb $f8, $f8, $f30 \r\n"
1555  "psubusb $f14, $f4, $f2 \r\n"
1556  "pavgb $f8, $f8, %[ff_pb_1] \r\n"
1557  "psubusb $f12, $f2, $f4 \r\n"
1558  "psubusb $f14, $f14, $f8 \r\n"
1559  "psubusb $f12, $f12, $f8 \r\n"
1560  "ldc1 $f28, 0x20+%[stack] \r\n"
1561  "pcmpeqb $f12, $f12, $f14 \r\n"
1562  "and $f12, $f12, $f28 \r\n"
1563  "gsldxc1 $f28, 0x0($8, %[stride]) \r\n"
1564  "psubusb $f14, $f28, $f2 \r\n"
1565  "psubusb $f8, $f2, $f28 \r\n"
1566  "psubusb $f14, $f14, $f10 \r\n"
1567  "psubusb $f8, $f8, $f10 \r\n"
1568  "pcmpeqb $f8, $f8, $f14 \r\n"
1569  "and $f8, $f8, $f12 \r\n"
1570  "gsldxc1 $f26, 0x0(%[pix], $10) \r\n"
1571  "sdc1 $f8, 0x30+%[stack] \r\n"
1572  "psubusb $f14, $f26, $f4 \r\n"
1573  "psubusb $f8, $f4, $f26 \r\n"
1574  "psubusb $f14, $f14, $f10 \r\n"
1575  "psubusb $f8, $f8, $f10 \r\n"
1576  "pcmpeqb $f8, $f8, $f14 \r\n"
1577  "and $f8, $f8, $f12 \r\n"
1578  "sdc1 $f8, 0x40+%[stack] \r\n"
1579  "pavgb $f8, $f28, $f0 \r\n"
1580  "pavgb $f10, $f2, $f4 \r\n"
1581  "pavgb $f8, $f8, $f10 \r\n"
1582  "sdc1 $f10, 0x10+%[stack] \r\n"
1583  "paddb $f12, $f28, $f0 \r\n"
1584  "paddb $f14, $f2, $f4 \r\n"
1585  "paddb $f12, $f12, $f14 \r\n"
1586  "mov.d $f14, $f12 \r\n"
1587  "sdc1 $f12, 0x0+%[stack] \r\n"
1588  "psrlh $f12, $f12, $f16 \r\n"
1589  "pavgb $f12, $f12, $f30 \r\n"
1590  "xor $f12, $f12, $f8 \r\n"
1591  "and $f12, $f12, %[ff_pb_1] \r\n"
1592  "psubb $f8, $f8, $f12 \r\n"
1593  "pavgb $f10, $f28, $f6 \r\n"
1594  "psubb $f12, $f28, $f6 \r\n"
1595  "paddb $f14, $f14, $f14 \r\n"
1596  "psubb $f14, $f14, $f12 \r\n"
1597  "and $f12, $f12, %[ff_pb_1] \r\n"
1598  "psubb $f10, $f10, $f12 \r\n"
1599  "ldc1 $f24, 0x10+%[stack] \r\n"
1600  "pavgb $f10, $f10, $f0 \r\n"
1601  "psrlh $f14, $f14, $f20 \r\n"
1602  "pavgb $f10, $f10, $f24 \r\n"
1603  "pavgb $f14, $f14, $f30 \r\n"
1604  "xor $f14, $f14, $f10 \r\n"
1605  "and $f14, $f14, %[ff_pb_1] \r\n"
1606  "psubb $f10, $f10, $f14 \r\n"
1607  "xor $f14, $f2, $f6 \r\n"
1608  "pavgb $f12, $f2, $f6 \r\n"
1609  "and $f14, $f14, %[ff_pb_1] \r\n"
1610  "psubb $f12, $f12, $f14 \r\n"
1611  "ldc1 $f24, 0x30+%[stack] \r\n"
1612  "pavgb $f12, $f12, $f0 \r\n"
1613  "ldc1 $f22, 0x20+%[stack] \r\n"
1614  "xor $f10, $f10, $f12 \r\n"
1615  "xor $f12, $f12, $f2 \r\n"
1616  "and $f10, $f10, $f24 \r\n"
1617  "and $f12, $f12, $f22 \r\n"
1618  "xor $f10, $f10, $f12 \r\n"
1619  "xor $f10, $f10, $f2 \r\n"
1620  "gssdxc1 $f10, 0x0($8, $9) \r\n"
1621  "ldc1 $f10, 0x0($8) \r\n"
1622  "paddb $f12, $f28, $f10 \r\n"
1623  "pavgb $f10, $f10, $f28 \r\n"
1624  "ldc1 $f22, 0x0+%[stack] \r\n"
1625  "pavgb $f10, $f10, $f8 \r\n"
1626  "paddb $f12, $f12, $f12 \r\n"
1627  "paddb $f12, $f12, $f22 \r\n"
1628  "psrlh $f12, $f12, $f20 \r\n"
1629  "pavgb $f12, $f12, $f30 \r\n"
1630  "xor $f12, $f12, $f10 \r\n"
1631  "and $f12, $f12, %[ff_pb_1] \r\n"
1632  "ldc1 $f22, 0x30+%[stack] \r\n"
1633  "psubb $f10, $f10, $f12 \r\n"
1634  "xor $f8, $f8, $f0 \r\n"
1635  "xor $f10, $f10, $f28 \r\n"
1636  "and $f8, $f8, $f22 \r\n"
1637  "and $f10, $f10, $f22 \r\n"
1638  "xor $f8, $f8, $f0 \r\n"
1639  "xor $f10, $f10, $f28 \r\n"
1640  "gssdxc1 $f8, 0x0($8, $10) \r\n"
1641  "gssdxc1 $f10, 0x0($8, %[stride]) \r\n"
1642  "pavgb $f8, $f26, $f6 \r\n"
1643  "pavgb $f10, $f4, $f2 \r\n"
1644  "pavgb $f8, $f8, $f10 \r\n"
1645  "sdc1 $f10, 0x10+%[stack] \r\n"
1646  "paddb $f12, $f26, $f6 \r\n"
1647  "paddb $f14, $f4, $f2 \r\n"
1648  "paddb $f12, $f12, $f14 \r\n"
1649  "mov.d $f14, $f12 \r\n"
1650  "sdc1 $f12, 0x0+%[stack] \r\n"
1651  "psrlh $f12, $f12, $f16 \r\n"
1652  "pavgb $f12, $f12, $f30 \r\n"
1653  "xor $f12, $f12, $f8 \r\n"
1654  "and $f12, $f12, %[ff_pb_1] \r\n"
1655  "psubb $f8, $f8, $f12 \r\n"
1656  "pavgb $f10, $f26, $f0 \r\n"
1657  "paddb $f14, $f14, $f14 \r\n"
1658  "psubb $f12, $f26, $f0 \r\n"
1659  "psubb $f14, $f14, $f12 \r\n"
1660  "and $f12, $f12, %[ff_pb_1] \r\n"
1661  "psubb $f10, $f10, $f12 \r\n"
1662  "ldc1 $f22, 0x10+%[stack] \r\n"
1663  "pavgb $f10, $f10, $f6 \r\n"
1664  "pavgb $f10, $f10, $f22 \r\n"
1665  "psrlh $f14, $f14, $f20 \r\n"
1666  "pavgb $f14, $f14, $f30 \r\n"
1667  "xor $f14, $f14, $f10 \r\n"
1668  "and $f14, $f14, %[ff_pb_1] \r\n"
1669  "psubb $f10, $f10, $f14 \r\n"
1670  "xor $f14, $f4, $f0 \r\n"
1671  "pavgb $f12, $f4, $f0 \r\n"
1672  "and $f14, $f14, %[ff_pb_1] \r\n"
1673  "ldc1 $f22, 0x40+%[stack] \r\n"
1674  "psubb $f12, $f12, $f14 \r\n"
1675  "ldc1 $f24, 0x20+%[stack] \r\n"
1676  "pavgb $f12, $f12, $f6 \r\n"
1677  "xor $f10, $f10, $f12 \r\n"
1678  "xor $f12, $f12, $f4 \r\n"
1679  "and $f10, $f10, $f22 \r\n"
1680  "and $f12, $f12, $f24 \r\n"
1681  "xor $f10, $f10, $f12 \r\n"
1682  "xor $f10, $f10, $f4 \r\n"
1683  "sdc1 $f10, 0x0(%[pix]) \r\n"
1684  "gsldxc1 $f10, 0x0(%[pix], $9) \r\n"
1685  "paddb $f12, $f26, $f10 \r\n"
1686  "pavgb $f10, $f10, $f26 \r\n"
1687  "ldc1 $f22, 0x0+%[stack] \r\n"
1688  "pavgb $f10, $f10, $f8 \r\n"
1689  "paddb $f12, $f12, $f12 \r\n"
1690  "paddb $f12, $f12, $f22 \r\n"
1691  "psrlh $f12, $f12, $f20 \r\n"
1692  "pavgb $f12, $f12, $f30 \r\n"
1693  "xor $f12, $f12, $f10 \r\n"
1694  "and $f12, $f12, %[ff_pb_1] \r\n"
1695  "ldc1 $f22, 0x40+%[stack] \r\n"
1696  "psubb $f10, $f10, $f12 \r\n"
1697  "xor $f8, $f8, $f6 \r\n"
1698  "xor $f10, $f10, $f26 \r\n"
1699  "and $f8, $f8, $f22 \r\n"
1700  "and $f10, $f10, $f22 \r\n"
1701  "xor $f8, $f8, $f6 \r\n"
1702  "xor $f10, $f10, $f26 \r\n"
1703  "gssdxc1 $f8, 0x0(%[pix], %[stride]) \r\n"
1704  "gssdxc1 $f10, 0x0(%[pix], $10) \r\n"
1705  "1: \r\n"
1706  ::[pix]"r"(pix),[stride]"r"((int64_t)stride),
1707  [alpha]"r"((int64_t)alpha),[beta]"r"((int64_t)beta),
1708  [stack]"m"(stack[0]),[ff_pb_1]"f"(ff_pb_1)
1709  : "$8","$9","$10","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14",
1710  "$f16","$f18","$f20","$f22","$f24","$f26","$f28","$f30"
1711  );
1712 }
1713 
1714 void ff_deblock_v_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1715  int8_t *tc0)
1716 {
1717  __asm__ volatile (
1718  "daddiu %[alpha], %[alpha], -0x1 \r\n"
1719  "daddiu %[beta], %[beta], -0x1 \r\n"
1720  "or $16, $0, %[pix] \r\n"
1721  "dsubu $16, $16, %[stride] \r\n"
1722  "dsubu $16, $16, %[stride] \r\n"
1723  "ldc1 $f0, 0x0($16) \r\n"
1724  "gsldxc1 $f2, 0x0($16, %[stride]) \r\n"
1725  "ldc1 $f4, 0x0(%[pix]) \r\n"
1726  "gsldxc1 $f6, 0x0(%[pix], %[stride]) \r\n"
1727  : [pix]"+r"(pix),[stride]"+r"(stride),[alpha]"+r"(alpha),
1728  [beta]"+r"(beta)
1729  : [tc0]"r"(tc0)
1730  : "$16","$f2","$f4"
1731  );
1732 
1733  chroma_inter_body_mmi(pix, stride, alpha, beta, tc0);
1734 
1735  __asm__ volatile (
1736  "gssdxc1 $f2, 0x0($16, %[stride]) \r\n"
1737  "sdc1 $f4, 0x0(%[pix]) \r\n"
1738  ::[pix]"r"(pix),[stride]"r"((int64_t)stride)
1739  : "$16","$f2","$f4"
1740  );
1741 }
1742 
1744  int beta)
1745 {
1746  __asm__ volatile (
1747  "daddiu %[alpha], %[alpha], -0x1 \r\n"
1748  "daddiu %[beta], %[beta], -0x1 \r\n"
1749  "or $16, $0, %[pix] \r\n"
1750  "dsubu $16, $16, %[stride] \r\n"
1751  "dsubu $16, $16, %[stride] \r\n"
1752  "ldc1 $f0, 0x0($16) \r\n"
1753  "gsldxc1 $f2, 0x0($16, %[stride]) \r\n"
1754  "ldc1 $f4, 0x0(%[pix]) \r\n"
1755  "gsldxc1 $f6, 0x0(%[pix], %[stride]) \r\n"
1756  : [pix]"+r"(pix),[stride]"+r"(stride),[alpha]"+r"(alpha),
1757  [beta]"+r"(beta)
1758  ::"$16","$f0","$f2","$f4","$f6"
1759  );
1760 
1761  chroma_intra_body_mmi(pix, stride, alpha, beta);
1762 
1763  __asm__ volatile (
1764  "gssdxc1 $f2, 0x0($16, %[stride]) \r\n"
1765  "sdc1 $f4, 0x0(%[pix]) \r\n"
1766  ::[pix]"r"(pix),[stride]"r"((int64_t)stride)
1767  : "$16","$f2","$f4"
1768  );
1769 }
1770 
1771 void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1772  int8_t *tc0)
1773 {
1774  __asm__ volatile (
1775  "daddiu %[alpha], %[alpha], -0x1 \r\n"
1776  "daddiu %[beta], %[beta], -0x1 \r\n"
1777  "daddu $16, %[stride], %[stride] \r\n"
1778  "daddiu %[pix], %[pix], -0x2 \r\n"
1779  "daddu $17, $16, %[stride] \r\n"
1780  "daddu $19, $16, $16 \r\n"
1781  "or $18, $0, %[pix] \r\n"
1782  "daddu %[pix], %[pix], $17 \r\n"
1783  "gslwlc1 $f0, 0x3($18) \r\n"
1784  "daddu $12, $18, %[stride] \r\n"
1785  "gslwrc1 $f0, 0x0($18) \r\n"
1786  "gslwlc1 $f4, 0x3($12) \r\n"
1787  "daddu $13, $18, $16 \r\n"
1788  "gslwrc1 $f4, 0x0($12) \r\n"
1789  "gslwlc1 $f2, 0x3($13) \r\n"
1790  "gslwrc1 $f2, 0x0($13) \r\n"
1791  "gslwlc1 $f6, 0x3(%[pix]) \r\n"
1792  "gslwrc1 $f6, 0x0(%[pix]) \r\n"
1793  "punpcklbh $f0, $f0, $f4 \r\n"
1794  "punpcklbh $f2, $f2, $f6 \r\n"
1795  "daddu $12, %[pix], %[stride] \r\n"
1796  "punpckhhw $f4, $f0, $f2 \r\n"
1797  "punpcklhw $f0, $f0, $f2 \r\n"
1798  "gslwlc1 $f8, 0x3($12) \r\n"
1799  "daddu $13, %[pix], $16 \r\n"
1800  "gslwrc1 $f8, 0x0($12) \r\n"
1801  "gslwlc1 $f12, 0x3($13) \r\n"
1802  "daddu $12, %[pix], $17 \r\n"
1803  "gslwrc1 $f12, 0x0($13) \r\n"
1804  "gslwlc1 $f10, 0x3($12) \r\n"
1805  "daddu $13, %[pix], $19 \r\n"
1806  "gslwrc1 $f10, 0x0($12) \r\n"
1807  "gslwlc1 $f14, 0x3($13) \r\n"
1808  "gslwrc1 $f14, 0x0($13) \r\n"
1809  "punpcklbh $f8, $f8, $f12 \r\n"
1810  "punpcklbh $f10, $f10, $f14 \r\n"
1811  "mov.d $f12, $f8 \r\n"
1812  "punpcklhw $f8, $f8, $f10 \r\n"
1813  "punpckhhw $f12, $f12, $f10 \r\n"
1814  "punpckhwd $f2, $f0, $f8 \r\n"
1815  "punpckhwd $f6, $f4, $f12 \r\n"
1816  "punpcklwd $f0, $f0, $f8 \r\n"
1817  "punpcklwd $f4, $f4, $f12 \r\n"
1818  "mov.d $f20, $f0 \r\n"
1819  "mov.d $f22, $f6 \r\n"
1820  : [pix]"+r"(pix),[stride]"+r"(stride),[alpha]"+r"(alpha),
1821  [beta]"+r"(beta)
1822  ::"$12","$13","$16","$17","$18","$19","$f0","$f2","$f4","$f6","$f8",
1823  "$f10","$f12","$f14","$f20","$f22"
1824  );
1825 
1826  chroma_inter_body_mmi(pix, stride, alpha, beta, tc0);
1827 
1828  __asm__ volatile (
1829  "punpckhwd $f8, $f20, $f20 \r\n"
1830  "punpckhwd $f10, $f2, $f2 \r\n"
1831  "punpckhwd $f12, $f4, $f4 \r\n"
1832  "punpcklbh $f0, $f20, $f2 \r\n"
1833  "punpcklbh $f4, $f4, $f22 \r\n"
1834  "punpcklhw $f2, $f0, $f4 \r\n"
1835  "punpckhhw $f0, $f0, $f4 \r\n"
1836  "gsswlc1 $f2, 0x3($18) \r\n"
1837  "gsswrc1 $f2, 0x0($18) \r\n"
1838  "daddu $12, $18, %[stride] \r\n"
1839  "punpckhwd $f2, $f2, $f2 \r\n"
1840  "gsswlc1 $f2, 0x3($12) \r\n"
1841  "daddu $13, $18, $16 \r\n"
1842  "gsswrc1 $f2, 0x0($12) \r\n"
1843  "gsswlc1 $f0, 0x3($13) \r\n"
1844  "gsswrc1 $f0, 0x0($13) \r\n"
1845  "punpckhwd $f0, $f0, $f0 \r\n"
1846  "punpckhwd $f6, $f22, $f22 \r\n"
1847  "gsswlc1 $f0, 0x3(%[pix]) \r\n"
1848  "gsswrc1 $f0, 0x0(%[pix]) \r\n"
1849  "punpcklbh $f8, $f8, $f10 \r\n"
1850  "punpcklbh $f12, $f12, $f6 \r\n"
1851  "daddu $12, %[pix], %[stride] \r\n"
1852  "punpcklhw $f10, $f8, $f12 \r\n"
1853  "punpckhhw $f8, $f8, $f12 \r\n"
1854  "gsswlc1 $f10, 0x3($12) \r\n"
1855  "gsswrc1 $f10, 0x0($12) \r\n"
1856  "punpckhwd $f10, $f10, $f10 \r\n"
1857  "daddu $12, %[pix], $16 \r\n"
1858  "daddu $13, %[pix], $17 \r\n"
1859  "gsswlc1 $f10, 0x3($12) \r\n"
1860  "gsswrc1 $f10, 0x0($12) \r\n"
1861  "gsswlc1 $f8, 0x3($13) \r\n"
1862  "daddu $12, %[pix], $19 \r\n"
1863  "punpckhwd $f20, $f8, $f8 \r\n"
1864  "gsswrc1 $f8, 0x0($13) \r\n"
1865  "gsswlc1 $f20, 0x3($12) \r\n"
1866  "gsswrc1 $f20, 0x0($12) \r\n"
1867  ::[pix]"r"(pix),[stride]"r"((int64_t)stride)
1868  : "$12","$13","$16","$17","$18","$19","$f0","$f2","$f4","$f6","$f8",
1869  "$f10","$f12","$f20"
1870  );
1871 }
1872 
1874  int beta)
1875 {
1876  __asm__ volatile (
1877  "daddiu %[alpha], %[alpha], -0x1 \r\n"
1878  "daddiu %[beta], %[beta], -0x1 \r\n"
1879  "daddu $16, %[stride], %[stride] \r\n"
1880  "daddiu %[pix], %[pix], -0x2 \r\n"
1881  "daddu $17, $16, %[stride] \r\n"
1882  "daddu $19, $16, $16 \r\n"
1883  "or $18, $0, %[pix] \r\n"
1884  "daddu %[pix], %[pix], $17 \r\n"
1885  "gslwlc1 $f0, 0x3($18) \r\n"
1886  "daddu $12, $18, %[stride] \r\n"
1887  "gslwrc1 $f0, 0x0($18) \r\n"
1888  "gslwlc1 $f4, 0x3($12) \r\n"
1889  "daddu $13, $18, $16 \r\n"
1890  "gslwrc1 $f4, 0x0($12) \r\n"
1891  "gslwlc1 $f2, 0x3($13) \r\n"
1892  "gslwrc1 $f2, 0x0($13) \r\n"
1893  "gslwlc1 $f6, 0x3(%[pix]) \r\n"
1894  "gslwrc1 $f6, 0x0(%[pix]) \r\n"
1895  "punpcklbh $f0, $f0, $f4 \r\n"
1896  "punpcklbh $f2, $f2, $f6 \r\n"
1897  "daddu $12, %[pix], %[stride] \r\n"
1898  "punpckhhw $f4, $f0, $f2 \r\n"
1899  "punpcklhw $f0, $f0, $f2 \r\n"
1900  "gslwlc1 $f8, 0x3($12) \r\n"
1901  "daddu $13, %[pix], $16 \r\n"
1902  "gslwrc1 $f8, 0x0($12) \r\n"
1903  "gslwlc1 $f12, 0x3($13) \r\n"
1904  "daddu $12, %[pix], $17 \r\n"
1905  "gslwrc1 $f12, 0x0($13) \r\n"
1906  "gslwlc1 $f10, 0x3($12) \r\n"
1907  "daddu $13, %[pix], $19 \r\n"
1908  "gslwrc1 $f10, 0x0($12) \r\n"
1909  "gslwlc1 $f14, 0x3($13) \r\n"
1910  "gslwrc1 $f14, 0x0($13) \r\n"
1911  "punpcklbh $f8, $f8, $f12 \r\n"
1912  "punpcklbh $f10, $f10, $f14 \r\n"
1913  "mov.d $f12, $f8 \r\n"
1914  "punpcklhw $f8, $f8, $f10 \r\n"
1915  "punpckhhw $f12, $f12, $f10 \r\n"
1916  "punpckhwd $f2, $f0, $f8 \r\n"
1917  "punpckhwd $f6, $f4, $f12 \r\n"
1918  "punpcklwd $f0, $f0, $f8 \r\n"
1919  "punpcklwd $f4, $f4, $f12 \r\n"
1920  : [pix]"+r"(pix),[stride]"+r"(stride),[alpha]"+r"(alpha),
1921  [beta]"+r"(beta)
1922  ::"$12","$13","$16","$17","$18","$19","$f0","$f2","$f4","$f6","$f8",
1923  "$f10","$f12","$f14","$f20","$f22"
1924  );
1925 
1926  chroma_intra_body_mmi(pix, stride, alpha, beta);
1927 
1928  __asm__ volatile (
1929  "punpckhwd $f8, $f0, $f0 \r\n"
1930  "punpckhwd $f10, $f2, $f2 \r\n"
1931  "punpckhwd $f12, $f4, $f4 \r\n"
1932  "punpcklbh $f0, $f0, $f2 \r\n"
1933  "punpcklbh $f4, $f4, $f6 \r\n"
1934  "punpcklhw $f2, $f0, $f4 \r\n"
1935  "punpckhhw $f0, $f0, $f4 \r\n"
1936  "gsswlc1 $f2, 0x3($18) \r\n"
1937  "gsswrc1 $f2, 0x0($18) \r\n"
1938  "daddu $12, $18, %[stride] \r\n"
1939  "punpckhwd $f2, $f2, $f2 \r\n"
1940  "gsswlc1 $f2, 0x3($12) \r\n"
1941  "daddu $13, $18, $16 \r\n"
1942  "gsswrc1 $f2, 0x0($12) \r\n"
1943  "gsswlc1 $f0, 0x3($13) \r\n"
1944  "gsswrc1 $f0, 0x0($13) \r\n"
1945  "punpckhwd $f0, $f0, $f0 \r\n"
1946  "punpckhwd $f6, $f6, $f6 \r\n"
1947  "gsswlc1 $f0, 0x3(%[pix]) \r\n"
1948  "gsswrc1 $f0, 0x0(%[pix]) \r\n"
1949  "punpcklbh $f8, $f8, $f10 \r\n"
1950  "punpcklbh $f12, $f12, $f6 \r\n"
1951  "daddu $12, %[pix], %[stride] \r\n"
1952  "punpcklhw $f10, $f8, $f12 \r\n"
1953  "punpckhhw $f8, $f8, $f12 \r\n"
1954  "gsswlc1 $f10, 0x3($12) \r\n"
1955  "gsswrc1 $f10, 0x0($12) \r\n"
1956  "punpckhwd $f10, $f10, $f10 \r\n"
1957  "daddu $12, %[pix], $16 \r\n"
1958  "daddu $13, %[pix], $17 \r\n"
1959  "gsswlc1 $f10, 0x3($12) \r\n"
1960  "gsswrc1 $f10, 0x0($12) \r\n"
1961  "gsswlc1 $f8, 0x3($13) \r\n"
1962  "daddu $12, %[pix], $19 \r\n"
1963  "punpckhwd $f20, $f8, $f8 \r\n"
1964  "gsswrc1 $f8, 0x0($13) \r\n"
1965  "gsswlc1 $f20, 0x3($12) \r\n"
1966  "gsswrc1 $f20, 0x0($12) \r\n"
1967  ::[pix]"r"(pix),[stride]"r"((int64_t)stride)
1968  : "$12","$13","$16","$17","$18","$19","$f0","$f2","$f4","$f6","$f8",
1969  "$f10","$f12","$f20"
1970  );
1971 }
1972 
1973 void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1974  int8_t *tc0)
1975 {
1976  if ((tc0[0] & tc0[1]) >= 0)
1977  ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
1978  if ((tc0[2] & tc0[3]) >= 0)
1979  ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
1980 }
1981 
1983  int beta)
1984 {
1985  ff_deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
1986  ff_deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
1987 }
1988 
1989 void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1990  int8_t *tc0)
1991 {
1992  uint64_t stack[0xd];
1993 
1994  __asm__ volatile (
1995  "daddu $15, %[stride], %[stride] \r\n"
1996  "daddiu $8, %[pix], -0x4 \r\n"
1997  "daddu $9, %[stride], $15 \r\n"
1998  "gsldlc1 $f0, 0x7($8) \r\n"
1999  "gsldrc1 $f0, 0x0($8) \r\n"
2000  "daddu $12, $8, %[stride] \r\n"
2001  "daddu $10, $8, $9 \r\n"
2002  "gsldlc1 $f2, 0x7($12) \r\n"
2003  "daddu $11, $8, $15 \r\n"
2004  "gsldrc1 $f2, 0x0($12) \r\n"
2005  "gsldlc1 $f4, 0x7($11) \r\n"
2006  "gsldrc1 $f4, 0x0($11) \r\n"
2007  "gsldlc1 $f6, 0x7($10) \r\n"
2008  "daddu $12, $10, %[stride] \r\n"
2009  "gsldrc1 $f6, 0x0($10) \r\n"
2010  "gsldlc1 $f8, 0x7($12) \r\n"
2011  "daddu $11, $10, $15 \r\n"
2012  "gsldrc1 $f8, 0x0($12) \r\n"
2013  "gsldlc1 $f10, 0x7($11) \r\n"
2014  "daddu $12, $10, $9 \r\n"
2015  "gsldrc1 $f10, 0x0($11) \r\n"
2016  "gsldlc1 $f12, 0x7($12) \r\n"
2017  "gsldrc1 $f12, 0x0($12) \r\n"
2018  "daddu $14, $15, $15 \r\n"
2019  "punpckhbh $f14, $f0, $f2 \r\n"
2020  "punpcklbh $f0, $f0, $f2 \r\n"
2021  "punpckhbh $f2, $f4, $f6 \r\n"
2022  "punpcklbh $f4, $f4, $f6 \r\n"
2023  "punpckhbh $f6, $f8, $f10 \r\n"
2024  "punpcklbh $f8, $f8, $f10 \r\n"
2025  "daddu $12, $10, $14 \r\n"
2026  "sdc1 $f2, 0x10+%[stack] \r\n"
2027  "gsldlc1 $f16, 0x7($12) \r\n"
2028  "gsldrc1 $f16, 0x0($12) \r\n"
2029  "daddu $13, $14, $14 \r\n"
2030  "punpckhbh $f10, $f12, $f16 \r\n"
2031  "punpcklbh $f12, $f12, $f16 \r\n"
2032  "punpckhhw $f2, $f0, $f4 \r\n"
2033  "punpcklhw $f0, $f0, $f4 \r\n"
2034  "punpckhhw $f4, $f8, $f12 \r\n"
2035  "punpcklhw $f8, $f8, $f12 \r\n"
2036  "ldc1 $f16, 0x10+%[stack] \r\n"
2037  "punpckhwd $f0, $f0, $f8 \r\n"
2038  "sdc1 $f0, 0x0+%[stack] \r\n"
2039  "punpckhhw $f12, $f14, $f16 \r\n"
2040  "punpcklhw $f14, $f14, $f16 \r\n"
2041  "punpckhhw $f0, $f6, $f10 \r\n"
2042  "punpcklhw $f6, $f6, $f10 \r\n"
2043  "punpcklwd $f12, $f12, $f0 \r\n"
2044  "punpckhwd $f10, $f14, $f6 \r\n"
2045  "punpcklwd $f14, $f14, $f6 \r\n"
2046  "punpckhwd $f6, $f2, $f4 \r\n"
2047  "punpcklwd $f2, $f2, $f4 \r\n"
2048  "sdc1 $f2, 0x10+%[stack] \r\n"
2049  "sdc1 $f6, 0x20+%[stack] \r\n"
2050  "sdc1 $f14, 0x30+%[stack] \r\n"
2051  "sdc1 $f10, 0x40+%[stack] \r\n"
2052  "sdc1 $f12, 0x50+%[stack] \r\n"
2053  "daddu $8, $8, $13 \r\n"
2054  "daddu $10, $10, $13 \r\n"
2055  "gsldlc1 $f0, 0x7($8) \r\n"
2056  "daddu $12, $8, %[stride] \r\n"
2057  "gsldrc1 $f0, 0x0($8) \r\n"
2058  "gsldlc1 $f2, 0x7($12) \r\n"
2059  "daddu $11, $8, $15 \r\n"
2060  "gsldrc1 $f2, 0x0($12) \r\n"
2061  "gsldlc1 $f4, 0x7($11) \r\n"
2062  "gsldrc1 $f4, 0x0($11) \r\n"
2063  "gsldlc1 $f6, 0x7($10) \r\n"
2064  "daddu $12, $10, %[stride] \r\n"
2065  "gsldrc1 $f6, 0x0($10) \r\n"
2066  "gsldlc1 $f8, 0x7($12) \r\n"
2067  "daddu $11, $10, $15 \r\n"
2068  "gsldrc1 $f8, 0x0($12) \r\n"
2069  "gsldlc1 $f10, 0x7($11) \r\n"
2070  "daddu $12, $10, $9 \r\n"
2071  "gsldrc1 $f10, 0x0($11) \r\n"
2072  "gsldlc1 $f12, 0x7($12) \r\n"
2073  "gsldrc1 $f12, 0x0($12) \r\n"
2074  "punpckhbh $f14, $f0, $f2 \r\n"
2075  "punpcklbh $f0, $f0, $f2 \r\n"
2076  "punpckhbh $f2, $f4, $f6 \r\n"
2077  "punpcklbh $f4, $f4, $f6 \r\n"
2078  "punpckhbh $f6, $f8, $f10 \r\n"
2079  "punpcklbh $f8, $f8, $f10 \r\n"
2080  "daddu $12, $10, $14 \r\n"
2081  "sdc1 $f2, 0x18+%[stack] \r\n"
2082  "gsldlc1 $f16, 0x7($12) \r\n"
2083  "gsldrc1 $f16, 0x0($12) \r\n"
2084  "punpckhhw $f2, $f0, $f4 \r\n"
2085  "punpckhbh $f10, $f12, $f16 \r\n"
2086  "punpcklbh $f12, $f12, $f16 \r\n"
2087  "punpcklhw $f0, $f0, $f4 \r\n"
2088  "punpckhhw $f4, $f8, $f12 \r\n"
2089  "punpcklhw $f8, $f8, $f12 \r\n"
2090  "punpckhwd $f0, $f0, $f8 \r\n"
2091  "ldc1 $f16, 0x18+%[stack] \r\n"
2092  "sdc1 $f0, 0x8+%[stack] \r\n"
2093  "punpckhhw $f12, $f14, $f16 \r\n"
2094  "punpcklhw $f14, $f14, $f16 \r\n"
2095  "punpckhhw $f0, $f6, $f10 \r\n"
2096  "punpcklhw $f6, $f6, $f10 \r\n"
2097  "punpckhwd $f10, $f14, $f6 \r\n"
2098  "punpcklwd $f14, $f14, $f6 \r\n"
2099  "punpckhwd $f6, $f2, $f4 \r\n"
2100  "punpcklwd $f2, $f2, $f4 \r\n"
2101  "punpcklwd $f12, $f12, $f0 \r\n"
2102  "sdc1 $f2, 0x18+%[stack] \r\n"
2103  "sdc1 $f6, 0x28+%[stack] \r\n"
2104  "sdc1 $f14, 0x38+%[stack] \r\n"
2105  "sdc1 $f10, 0x48+%[stack] \r\n"
2106  "sdc1 $f12, 0x58+%[stack] \r\n"
2107  ::[pix]"r"(pix),[stride]"r"((int64_t)stride),[stack]"m"(stack[0])
2108  : "$8","$9","$10","$11","$12","$13","$14","$15","$f0","$f2","$f4",
2109  "$f6","$f8","$f10","$f12","$f14","$f16"
2110  );
2111 
2112  ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
2113 
2114  __asm__ volatile (
2115  "daddu $15, %[stride], %[stride] \r\n"
2116  "daddiu $8, %[pix], -0x2 \r\n"
2117  "daddu $14, $15, $15 \r\n"
2118  "daddu $9, $15, %[stride] \r\n"
2119  "daddu $13, $14, $14 \r\n"
2120  "daddu $10, $8, $9 \r\n"
2121  "ldc1 $f0, 0x10+%[stack] \r\n"
2122  "ldc1 $f2, 0x20+%[stack] \r\n"
2123  "ldc1 $f4, 0x30+%[stack] \r\n"
2124  "ldc1 $f6, 0x40+%[stack] \r\n"
2125  "punpckhwd $f8, $f0, $f0 \r\n"
2126  "punpckhwd $f10, $f2, $f2 \r\n"
2127  "punpckhwd $f12, $f4, $f4 \r\n"
2128  "punpcklbh $f0, $f0, $f2 \r\n"
2129  "punpcklbh $f4, $f4, $f6 \r\n"
2130  "punpcklhw $f2, $f0, $f4 \r\n"
2131  "punpckhhw $f0, $f0, $f4 \r\n"
2132  "gsswlc1 $f2, 0x3($8) \r\n"
2133  "gsswrc1 $f2, 0x0($8) \r\n"
2134  "daddu $12, $8, %[stride] \r\n"
2135  "punpckhwd $f2, $f2, $f2 \r\n"
2136  "daddu $11, $8, $15 \r\n"
2137  "gsswlc1 $f2, 0x3($12) \r\n"
2138  "gsswrc1 $f2, 0x0($12) \r\n"
2139  "gsswlc1 $f0, 0x3($11) \r\n"
2140  "gsswrc1 $f0, 0x0($11) \r\n"
2141  "punpckhwd $f0, $f0, $f0 \r\n"
2142  "punpckhwd $f6, $f6, $f6 \r\n"
2143  "gsswlc1 $f0, 0x3($10) \r\n"
2144  "gsswrc1 $f0, 0x0($10) \r\n"
2145  "punpcklbh $f8, $f8, $f10 \r\n"
2146  "punpcklbh $f12, $f12, $f6 \r\n"
2147  "punpcklhw $f10, $f8, $f12 \r\n"
2148  "daddu $12, $10, %[stride] \r\n"
2149  "punpckhhw $f8, $f8, $f12 \r\n"
2150  "gsswlc1 $f10, 0x3($12) \r\n"
2151  "gsswrc1 $f10, 0x0($12) \r\n"
2152  "daddu $12, $10, $15 \r\n"
2153  "punpckhwd $f10, $f10, $f10 \r\n"
2154  "daddu $11, $10, $9 \r\n"
2155  "gsswlc1 $f10, 0x3($12) \r\n"
2156  "gsswrc1 $f10, 0x0($12) \r\n"
2157  "gsswlc1 $f8, 0x3($11) \r\n"
2158  "gsswrc1 $f8, 0x0($11) \r\n"
2159  "daddu $12, $10, $14 \r\n"
2160  "punpckhwd $f8, $f8, $f8 \r\n"
2161  "daddu $8, $8, $13 \r\n"
2162  "gsswlc1 $f8, 0x3($12) \r\n"
2163  "gsswrc1 $f8, 0x0($12) \r\n"
2164  "daddu $10, $10, $13 \r\n"
2165  "ldc1 $f0, 0x18+%[stack] \r\n"
2166  "ldc1 $f2, 0x28+%[stack] \r\n"
2167  "ldc1 $f4, 0x38+%[stack] \r\n"
2168  "ldc1 $f6, 0x48+%[stack] \r\n"
2169  "daddu $15, %[stride], %[stride] \r\n"
2170  "punpckhwd $f8, $f0, $f0 \r\n"
2171  "daddu $14, $15, $15 \r\n"
2172  "punpckhwd $f10, $f2, $f2 \r\n"
2173  "punpckhwd $f12, $f4, $f4 \r\n"
2174  "punpcklbh $f0, $f0, $f2 \r\n"
2175  "punpcklbh $f4, $f4, $f6 \r\n"
2176  "daddu $12, $8, %[stride] \r\n"
2177  "punpcklhw $f2, $f0, $f4 \r\n"
2178  "punpckhhw $f0, $f0, $f4 \r\n"
2179  "gsswlc1 $f2, 0x3($8) \r\n"
2180  "gsswrc1 $f2, 0x0($8) \r\n"
2181  "punpckhwd $f2, $f2, $f2 \r\n"
2182  "daddu $11, $8, $15 \r\n"
2183  "gsswlc1 $f2, 0x3($12) \r\n"
2184  "gsswrc1 $f2, 0x0($12) \r\n"
2185  "gsswlc1 $f0, 0x3($11) \r\n"
2186  "gsswrc1 $f0, 0x0($11) \r\n"
2187  "punpckhwd $f0, $f0, $f0 \r\n"
2188  "punpckhwd $f6, $f6, $f6 \r\n"
2189  "gsswlc1 $f0, 0x3($10) \r\n"
2190  "gsswrc1 $f0, 0x0($10) \r\n"
2191  "punpcklbh $f8, $f8, $f10 \r\n"
2192  "punpcklbh $f12, $f12, $f6 \r\n"
2193  "daddu $12, $10, %[stride] \r\n"
2194  "punpcklhw $f10, $f8, $f12 \r\n"
2195  "punpckhhw $f8, $f8, $f12 \r\n"
2196  "gsswlc1 $f10, 0x3($12) \r\n"
2197  "gsswrc1 $f10, 0x0($12) \r\n"
2198  "daddu $12, $10, $15 \r\n"
2199  "punpckhwd $f10, $f10, $f10 \r\n"
2200  "daddu $11, $10, $9 \r\n"
2201  "gsswlc1 $f10, 0x3($12) \r\n"
2202  "gsswrc1 $f10, 0x0($12) \r\n"
2203  "gsswlc1 $f8, 0x3($11) \r\n"
2204  "gsswrc1 $f8, 0x0($11) \r\n"
2205  "daddu $12, $10, $14 \r\n"
2206  "punpckhwd $f8, $f8, $f8 \r\n"
2207  "gsswlc1 $f8, 0x3($12) \r\n"
2208  "gsswrc1 $f8, 0x0($12) \r\n"
2209  ::[pix]"r"(pix),[stride]"r"((int64_t)stride),[stack]"m"(stack[0])
2210  : "$8","$9","$10","$11","$12","$13","$14","$15","$f0","$f2","$f4",
2211  "$f6","$f8","$f10","$f12","$f14","$f16"
2212  );
2213 }
2214 
2216  int beta)
2217 {
2218  uint64_t ptmp[0x11];
2219  uint64_t pdat[4];
2220 
2221  __asm__ volatile (
2222  "daddu $12, %[stride], %[stride] \r\n"
2223  "daddiu $10, %[pix], -0x4 \r\n"
2224  "daddu $11, $12, %[stride] \r\n"
2225  "daddu $13, $12, $12 \r\n"
2226  "daddu $9, $10, $11 \r\n"
2227  "daddu $8, $10, %[stride] \r\n"
2228  "gsldlc1 $f0, 0x7($10) \r\n"
2229  "gsldrc1 $f0, 0x0($10) \r\n"
2230  "daddu $14, $10, $12 \r\n"
2231  "gsldlc1 $f2, 0x7($8) \r\n"
2232  "gsldrc1 $f2, 0x0($8) \r\n"
2233  "gsldlc1 $f4, 0x7($14) \r\n"
2234  "gsldrc1 $f4, 0x0($14) \r\n"
2235  "daddu $8, $9, %[stride] \r\n"
2236  "gsldlc1 $f6, 0x7($9) \r\n"
2237  "gsldrc1 $f6, 0x0($9) \r\n"
2238  "daddu $14, $9, $12 \r\n"
2239  "gsldlc1 $f8, 0x7($8) \r\n"
2240  "gsldrc1 $f8, 0x0($8) \r\n"
2241  "daddu $8, $9, $11 \r\n"
2242  "gsldlc1 $f10, 0x7($14) \r\n"
2243  "gsldrc1 $f10, 0x0($14) \r\n"
2244  "gsldlc1 $f12, 0x7($8) \r\n"
2245  "gsldrc1 $f12, 0x0($8) \r\n"
2246  "daddu $8, $9, $13 \r\n"
2247  "punpckhbh $f14, $f0, $f2 \r\n"
2248  "punpcklbh $f0, $f0, $f2 \r\n"
2249  "punpckhbh $f2, $f4, $f6 \r\n"
2250  "punpcklbh $f4, $f4, $f6 \r\n"
2251  "punpckhbh $f6, $f8, $f10 \r\n"
2252  "punpcklbh $f8, $f8, $f10 \r\n"
2253  "gsldlc1 $f16, 0x7($8) \r\n"
2254  "gsldrc1 $f16, 0x0($8) \r\n"
2255  "punpckhbh $f10, $f12, $f16 \r\n"
2256  "punpcklbh $f12, $f12, $f16 \r\n"
2257  "sdc1 $f6, 0x0+%[ptmp] \r\n"
2258  "punpckhhw $f6, $f0, $f4 \r\n"
2259  "punpcklhw $f0, $f0, $f4 \r\n"
2260  "punpckhhw $f4, $f8, $f12 \r\n"
2261  "punpcklhw $f8, $f8, $f12 \r\n"
2262  "punpckhhw $f12, $f14, $f2 \r\n"
2263  "punpcklhw $f14, $f14, $f2 \r\n"
2264  "sdc1 $f4, 0x20+%[ptmp] \r\n"
2265  "ldc1 $f4, 0x0+%[ptmp] \r\n"
2266  "punpckhhw $f2, $f4, $f10 \r\n"
2267  "punpcklhw $f4, $f4, $f10 \r\n"
2268  "punpckhwd $f10, $f0, $f8 \r\n"
2269  "punpcklwd $f0, $f0, $f8 \r\n"
2270  "punpckhwd $f8, $f14, $f4 \r\n"
2271  "punpcklwd $f14, $f14, $f4 \r\n"
2272  "sdc1 $f0, 0x0+%[ptmp] \r\n"
2273  "sdc1 $f10, 0x10+%[ptmp] \r\n"
2274  "sdc1 $f14, 0x40+%[ptmp] \r\n"
2275  "sdc1 $f8, 0x50+%[ptmp] \r\n"
2276  "ldc1 $f16, 0x20+%[ptmp] \r\n"
2277  "punpckhwd $f0, $f6, $f16 \r\n"
2278  "punpcklwd $f6, $f6, $f16 \r\n"
2279  "punpckhwd $f10, $f12, $f2 \r\n"
2280  "punpcklwd $f12, $f12, $f2 \r\n"
2281  "daddu $8, $13, $13 \r\n"
2282  "sdc1 $f6, 0x20+%[ptmp] \r\n"
2283  "sdc1 $f0, 0x30+%[ptmp] \r\n"
2284  "sdc1 $f12, 0x60+%[ptmp] \r\n"
2285  "sdc1 $f10, 0x70+%[ptmp] \r\n"
2286  "daddu $10, $10, $8 \r\n"
2287  "daddu $9, $9, $8 \r\n"
2288  "daddu $8, $10, %[stride] \r\n"
2289  "gsldlc1 $f0, 0x7($10) \r\n"
2290  "gsldrc1 $f0, 0x0($10) \r\n"
2291  "daddu $14, $10, $12 \r\n"
2292  "gsldlc1 $f2, 0x7($8) \r\n"
2293  "gsldrc1 $f2, 0x0($8) \r\n"
2294  "gsldlc1 $f4, 0x7($14) \r\n"
2295  "gsldrc1 $f4, 0x0($14) \r\n"
2296  "daddu $8, $9, %[stride] \r\n"
2297  "gsldlc1 $f6, 0x7($9) \r\n"
2298  "gsldrc1 $f6, 0x0($9) \r\n"
2299  "daddu $14, $9, $12 \r\n"
2300  "gsldlc1 $f8, 0x7($8) \r\n"
2301  "gsldrc1 $f8, 0x0($8) \r\n"
2302  "daddu $8, $9, $11 \r\n"
2303  "gsldlc1 $f10, 0x7($14) \r\n"
2304  "gsldrc1 $f10, 0x0($14) \r\n"
2305  "gsldlc1 $f12, 0x7($8) \r\n"
2306  "gsldrc1 $f12, 0x0($8) \r\n"
2307  "daddu $8, $9, $13 \r\n"
2308  "punpckhbh $f14, $f0, $f2 \r\n"
2309  "punpcklbh $f0, $f0, $f2 \r\n"
2310  "punpckhbh $f2, $f4, $f6 \r\n"
2311  "punpcklbh $f4, $f4, $f6 \r\n"
2312  "punpckhbh $f6, $f8, $f10 \r\n"
2313  "punpcklbh $f8, $f8, $f10 \r\n"
2314  "gsldlc1 $f16, 0x7($8) \r\n"
2315  "gsldrc1 $f16, 0x0($8) \r\n"
2316  "punpckhbh $f10, $f12, $f16 \r\n"
2317  "punpcklbh $f12, $f12, $f16 \r\n"
2318  "sdc1 $f6, 0x8+%[ptmp] \r\n"
2319  "punpckhhw $f6, $f0, $f4 \r\n"
2320  "punpcklhw $f0, $f0, $f4 \r\n"
2321  "punpckhhw $f4, $f8, $f12 \r\n"
2322  "punpcklhw $f8, $f8, $f12 \r\n"
2323  "punpckhhw $f12, $f14, $f2 \r\n"
2324  "punpcklhw $f14, $f14, $f2 \r\n"
2325  "sdc1 $f4, 0x28+%[ptmp] \r\n"
2326  "ldc1 $f4, 0x8+%[ptmp] \r\n"
2327  "punpckhhw $f2, $f4, $f10 \r\n"
2328  "punpcklhw $f4, $f4, $f10 \r\n"
2329  "punpckhwd $f10, $f0, $f8 \r\n"
2330  "punpcklwd $f0, $f0, $f8 \r\n"
2331  "punpckhwd $f8, $f14, $f4 \r\n"
2332  "punpcklwd $f14, $f14, $f4 \r\n"
2333  "sdc1 $f0, 0x8+%[ptmp] \r\n"
2334  "sdc1 $f10, 0x18+%[ptmp] \r\n"
2335  "sdc1 $f14, 0x48+%[ptmp] \r\n"
2336  "sdc1 $f8, 0x58+%[ptmp] \r\n"
2337  "ldc1 $f16, 0x28+%[ptmp] \r\n"
2338  "punpckhwd $f0, $f6, $f16 \r\n"
2339  "punpcklwd $f6, $f6, $f16 \r\n"
2340  "punpckhwd $f10, $f12, $f2 \r\n"
2341  "punpcklwd $f12, $f12, $f2 \r\n"
2342  "sdc1 $f6, 0x28+%[ptmp] \r\n"
2343  "sdc1 $f0, 0x38+%[ptmp] \r\n"
2344  "sdc1 $f12, 0x68+%[ptmp] \r\n"
2345  "sdc1 $f10, 0x78+%[ptmp] \r\n"
2346  "sd $10, 0x00+%[pdat] \r\n"
2347  "sd $11, 0x08+%[pdat] \r\n"
2348  "sd $12, 0x10+%[pdat] \r\n"
2349  "sd $13, 0x18+%[pdat] \r\n"
2350  ::[pix]"r"(pix),[stride]"r"((uint64_t)stride),[ptmp]"m"(ptmp[0]),
2351  [pdat]"m"(pdat[0])
2352  : "$8","$9","$10","$11","$12","$13","$14","$f0","$f2","$f4","$f6",
2353  "$f8","$f10","$f12","$f14","$f16"
2354  );
2355 
2356  ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
2357 
2358  __asm__ volatile (
2359  "ld $10, 0x00+%[pdat] \r\n"
2360  "ld $11, 0x08+%[pdat] \r\n"
2361  "ld $12, 0x10+%[pdat] \r\n"
2362  "ld $13, 0x18+%[pdat] \r\n"
2363  "daddu $9, $10, $11 \r\n"
2364  "ldc1 $f0, 0x8+%[ptmp] \r\n"
2365  "ldc1 $f2, 0x18+%[ptmp] \r\n"
2366  "ldc1 $f4, 0x28+%[ptmp] \r\n"
2367  "ldc1 $f6, 0x38+%[ptmp] \r\n"
2368  "ldc1 $f8, 0x48+%[ptmp] \r\n"
2369  "ldc1 $f10, 0x58+%[ptmp] \r\n"
2370  "ldc1 $f12, 0x68+%[ptmp] \r\n"
2371  "punpckhbh $f14, $f0, $f2 \r\n"
2372  "punpcklbh $f0, $f0, $f2 \r\n"
2373  "punpckhbh $f2, $f4, $f6 \r\n"
2374  "punpcklbh $f4, $f4, $f6 \r\n"
2375  "punpckhbh $f6, $f8, $f10 \r\n"
2376  "punpcklbh $f8, $f8, $f10 \r\n"
2377  "ldc1 $f16, 0x78+%[ptmp] \r\n"
2378  "punpckhbh $f10, $f12, $f16 \r\n"
2379  "punpcklbh $f12, $f12, $f16 \r\n"
2380  "gssdlc1 $f6, 0x7($10) \r\n"
2381  "gssdrc1 $f6, 0x0($10) \r\n"
2382  "daddu $8, $10, $12 \r\n"
2383  "punpckhhw $f6, $f0, $f4 \r\n"
2384  "punpcklhw $f0, $f0, $f4 \r\n"
2385  "punpckhhw $f4, $f8, $f12 \r\n"
2386  "punpcklhw $f8, $f8, $f12 \r\n"
2387  "punpckhhw $f12, $f14, $f2 \r\n"
2388  "punpcklhw $f14, $f14, $f2 \r\n"
2389  "gssdlc1 $f4, 0x7($8) \r\n"
2390  "gssdrc1 $f4, 0x0($8) \r\n"
2391  "gsldlc1 $f4, 0x7($10) \r\n"
2392  "gsldrc1 $f4, 0x0($10) \r\n"
2393  "punpckhhw $f2, $f4, $f10 \r\n"
2394  "punpcklhw $f4, $f4, $f10 \r\n"
2395  "punpckhwd $f10, $f0, $f8 \r\n"
2396  "punpcklwd $f0, $f0, $f8 \r\n"
2397  "punpckhwd $f8, $f14, $f4 \r\n"
2398  "punpcklwd $f14, $f14, $f4 \r\n"
2399  "daddu $8, $10, %[stride] \r\n"
2400  "gssdlc1 $f0, 0x7($10) \r\n"
2401  "gssdrc1 $f0, 0x0($10) \r\n"
2402  "daddu $14, $9, %[stride] \r\n"
2403  "gssdlc1 $f10, 0x7($8) \r\n"
2404  "gssdrc1 $f10, 0x0($8) \r\n"
2405  "daddu $8, $9, $12 \r\n"
2406  "gssdlc1 $f14, 0x7($14) \r\n"
2407  "gssdrc1 $f14, 0x0($14) \r\n"
2408  "daddu $14, $10, $12 \r\n"
2409  "gssdlc1 $f8, 0x7($8) \r\n"
2410  "gssdrc1 $f8, 0x0($8) \r\n"
2411  "gsldlc1 $f16, 0x7($14) \r\n"
2412  "gsldrc1 $f16, 0x0($14) \r\n"
2413  "daddu $8, $10, $12 \r\n"
2414  "punpckhwd $f0, $f6, $f16 \r\n"
2415  "punpcklwd $f6, $f6, $f16 \r\n"
2416  "punpckhwd $f10, $f12, $f2 \r\n"
2417  "punpcklwd $f12, $f12, $f2 \r\n"
2418  "gssdlc1 $f6, 0x7($8) \r\n"
2419  "gssdrc1 $f6, 0x0($8) \r\n"
2420  "daddu $8, $9, $11 \r\n"
2421  "gssdlc1 $f0, 0x7($9) \r\n"
2422  "gssdrc1 $f0, 0x0($9) \r\n"
2423  "daddu $14, $9, $13 \r\n"
2424  "gssdlc1 $f12, 0x7($8) \r\n"
2425  "gssdrc1 $f12, 0x0($8) \r\n"
2426  "daddu $8, $13, $13 \r\n"
2427  "gssdlc1 $f10, 0x7($14) \r\n"
2428  "gssdrc1 $f10, 0x0($14) \r\n"
2429  "dsubu $10, $10, $8 \r\n"
2430  "dsubu $9, $9, $8 \r\n"
2431  "ldc1 $f0, 0x0+%[ptmp] \r\n"
2432  "ldc1 $f2, 0x10+%[ptmp] \r\n"
2433  "ldc1 $f4, 0x20+%[ptmp] \r\n"
2434  "ldc1 $f6, 0x30+%[ptmp] \r\n"
2435  "ldc1 $f8, 0x40+%[ptmp] \r\n"
2436  "ldc1 $f10, 0x50+%[ptmp] \r\n"
2437  "ldc1 $f12, 0x60+%[ptmp] \r\n"
2438  "punpckhbh $f14, $f0, $f2 \r\n"
2439  "punpcklbh $f0, $f0, $f2 \r\n"
2440  "punpckhbh $f2, $f4, $f6 \r\n"
2441  "punpcklbh $f4, $f4, $f6 \r\n"
2442  "punpckhbh $f6, $f8, $f10 \r\n"
2443  "punpcklbh $f8, $f8, $f10 \r\n"
2444  "ldc1 $f16, 0x70+%[ptmp] \r\n"
2445  "punpckhbh $f10, $f12, $f16 \r\n"
2446  "punpcklbh $f12, $f12, $f16 \r\n"
2447  "gssdlc1 $f6, 0x7($10) \r\n"
2448  "gssdrc1 $f6, 0x0($10) \r\n"
2449  "daddu $8, $10, $12 \r\n"
2450  "punpckhhw $f6, $f0, $f4 \r\n"
2451  "punpcklhw $f0, $f0, $f4 \r\n"
2452  "punpckhhw $f4, $f8, $f12 \r\n"
2453  "punpcklhw $f8, $f8, $f12 \r\n"
2454  "punpckhhw $f12, $f14, $f2 \r\n"
2455  "punpcklhw $f14, $f14, $f2 \r\n"
2456  "gssdlc1 $f4, 0x7($8) \r\n"
2457  "gssdrc1 $f4, 0x0($8) \r\n"
2458  "gsldlc1 $f4, 0x7($10) \r\n"
2459  "gsldrc1 $f4, 0x0($10) \r\n"
2460  "punpckhhw $f2, $f4, $f10 \r\n"
2461  "punpcklhw $f4, $f4, $f10 \r\n"
2462  "punpckhwd $f10, $f0, $f8 \r\n"
2463  "punpcklwd $f0, $f0, $f8 \r\n"
2464  "punpckhwd $f8, $f14, $f4 \r\n"
2465  "punpcklwd $f14, $f14, $f4 \r\n"
2466  "daddu $8, $10, %[stride] \r\n"
2467  "gssdlc1 $f0, 0x7($10) \r\n"
2468  "gssdrc1 $f0, 0x0($10) \r\n"
2469  "daddu $14, $9, %[stride] \r\n"
2470  "gssdlc1 $f10, 0x7($8) \r\n"
2471  "gssdrc1 $f10, 0x0($8) \r\n"
2472  "daddu $8, $9, $12 \r\n"
2473  "gssdlc1 $f14, 0x7($14) \r\n"
2474  "gssdrc1 $f14, 0x0($14) \r\n"
2475  "daddu $14, $10, $12 \r\n"
2476  "gssdlc1 $f8, 0x7($8) \r\n"
2477  "gssdrc1 $f8, 0x0($8) \r\n"
2478  "gsldlc1 $f16, 0x7($14) \r\n"
2479  "gsldrc1 $f16, 0x0($14) \r\n"
2480  "daddu $8, $10, $12 \r\n"
2481  "punpckhwd $f0, $f6, $f16 \r\n"
2482  "punpcklwd $f6, $f6, $f16 \r\n"
2483  "punpckhwd $f10, $f12, $f2 \r\n"
2484  "punpcklwd $f12, $f12, $f2 \r\n"
2485  "gssdlc1 $f6, 0x7($8) \r\n"
2486  "gssdrc1 $f6, 0x0($8) \r\n"
2487  "daddu $8, $9, $11 \r\n"
2488  "gssdlc1 $f0, 0x7($9) \r\n"
2489  "gssdrc1 $f0, 0x0($9) \r\n"
2490  "daddu $14, $9, $13 \r\n"
2491  "gssdlc1 $f12, 0x7($8) \r\n"
2492  "gssdrc1 $f12, 0x0($8) \r\n"
2493  "gssdlc1 $f10, 0x7($14) \r\n"
2494  "gssdrc1 $f10, 0x0($14) \r\n"
2495  ::[pix]"r"(pix),[stride]"r"((uint64_t)stride),[ptmp]"m"(ptmp[0]),
2496  [pdat]"m"(pdat[0])
2497  : "$8","$9","$10","$11","$12","$13","$14","$f0","$f2","$f4","$f6",
2498  "$f8","$f10","$f12","$f14","$f16"
2499  );
2500 }
void ff_h264_weight_pixels8_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1141
void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:684
void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
Definition: h264dsp_mmi.c:1020
else temp
Definition: vf_mcdeint.c:259
void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1973
const char * b
Definition: vf_curves.c:109
void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1873
void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:630
void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:731
uint8_t
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1080
void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input, int qmul)
Definition: h264dsp_mmi.c:775
void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1982
void ff_h264_weight_pixels4_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1219
static void chroma_inter_body_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1286
void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1989
const uint64_t ff_pb_A1
Definition: constants.c:54
const uint64_t ff_pw_32
Definition: constants.c:37
void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1250
void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:747
static double alpha(void *priv, double x, double y)
Definition: vf_geq.c:99
void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:714
const uint64_t ff_pb_3
Definition: constants.c:52
void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:145
void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2215
void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:701
static void chroma_intra_body_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1341
void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
Definition: h264dsp_mmi.c:987
void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:65
static const uint8_t offset[127][2]
Definition: vf_spp.c:92
const uint64_t ff_pw_1
Definition: constants.c:26
static const uint8_t scan8[16 *3+3]
Definition: h264.h:1005
void ff_deblock_v_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1714
#define src
Definition: vp9dsp.c:530
void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1743
void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1393
void ff_deblock_v8_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1506
void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:594
void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1176
BYTE int const BYTE int int int height
Definition: avisynth_c.h:676
static int weight(int i, int blen, int offset)
Definition: diracdec.c:1413
void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1771
const uint64_t ff_pb_1
Definition: constants.c:51
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
static double c[64]
void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
Definition: h264dsp_mmi.c:29
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1034
#define stride
static int16_t block[64]
Definition: dct-test.c:112