FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
h264dsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264dsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  * Heiher <r@hev.cc>
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
27 #include "h264dsp_mips.h"
28 #include "libavutil/mips/asmdefs.h"
29 
30 void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
31 {
32  double ftmp[9];
33  uint64_t low32;
34 
35  __asm__ volatile (
36  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
37  "ldc1 %[ftmp1], 0x00(%[src]) \n\t"
38  "ldc1 %[ftmp2], 0x08(%[src]) \n\t"
39  "ldc1 %[ftmp3], 0x10(%[src]) \n\t"
40  "ldc1 %[ftmp4], 0x18(%[src]) \n\t"
41  "uld %[low32], 0x00(%[dst0]) \n\t"
42  "mtc1 %[low32], %[ftmp5] \n\t"
43  "uld %[low32], 0x00(%[dst1]) \n\t"
44  "mtc1 %[low32], %[ftmp6] \n\t"
45  "uld %[low32], 0x00(%[dst2]) \n\t"
46  "mtc1 %[low32], %[ftmp7] \n\t"
47  "uld %[low32], 0x00(%[dst3]) \n\t"
48  "mtc1 %[low32], %[ftmp8] \n\t"
49  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
50  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
51  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
52  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
53  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
54  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
55  "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
56  "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
57  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
58  "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
59  "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
60  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
61  "gsswlc1 %[ftmp1], 0x03(%[dst0]) \n\t"
62  "gsswrc1 %[ftmp1], 0x00(%[dst0]) \n\t"
63  "gsswlc1 %[ftmp2], 0x03(%[dst1]) \n\t"
64  "gsswrc1 %[ftmp2], 0x00(%[dst1]) \n\t"
65  "gsswlc1 %[ftmp3], 0x03(%[dst2]) \n\t"
66  "gsswrc1 %[ftmp3], 0x00(%[dst2]) \n\t"
67  "gsswlc1 %[ftmp4], 0x03(%[dst3]) \n\t"
68  "gsswrc1 %[ftmp4], 0x00(%[dst3]) \n\t"
69  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
70  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
71  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
72  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
73  [ftmp8]"=&f"(ftmp[8]),
74  [low32]"=&r"(low32)
75  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
76  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
77  [src]"r"(src)
78  : "memory"
79  );
80 
81  memset(src, 0, 32);
82 }
83 
84 void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
85 {
86  double ftmp[12];
87  uint64_t tmp[1];
88  uint64_t low32;
89 
90  __asm__ volatile (
91  "dli %[tmp0], 0x01 \n\t"
92  "ldc1 %[ftmp0], 0x00(%[block]) \n\t"
93  "mtc1 %[tmp0], %[ftmp8] \n\t"
94  "ldc1 %[ftmp1], 0x08(%[block]) \n\t"
95  "dli %[tmp0], 0x06 \n\t"
96  "ldc1 %[ftmp2], 0x10(%[block]) \n\t"
97  "mtc1 %[tmp0], %[ftmp9] \n\t"
98  "psrah %[ftmp4], %[ftmp1], %[ftmp8] \n\t"
99  "ldc1 %[ftmp3], 0x18(%[block]) \n\t"
100  "psrah %[ftmp5], %[ftmp3], %[ftmp8] \n\t"
101  "psubh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
102  "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
103  "paddh %[ftmp10], %[ftmp2], %[ftmp0] \n\t"
104  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
105  "paddh %[ftmp11], %[ftmp5], %[ftmp10] \n\t"
106  "psubh %[ftmp2], %[ftmp10], %[ftmp5] \n\t"
107  "paddh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
108  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
109  "punpckhhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
110  "punpcklhw %[ftmp5], %[ftmp11], %[ftmp10] \n\t"
111  "punpckhhw %[ftmp4], %[ftmp0], %[ftmp2] \n\t"
112  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
113  "punpckhwd %[ftmp2], %[ftmp5], %[ftmp0] \n\t"
114  "punpcklwd %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
115  "punpcklwd %[ftmp10], %[ftmp1], %[ftmp4] \n\t"
116  "punpckhwd %[ftmp0], %[ftmp1], %[ftmp4] \n\t"
117  "paddh %[ftmp5], %[ftmp5], %[ff_pw_32] \n\t"
118  "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
119  "psrah %[ftmp3], %[ftmp0], %[ftmp8] \n\t"
120  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
121  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
122  "paddh %[ftmp1], %[ftmp10], %[ftmp5] \n\t"
123  "psubh %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
124  "paddh %[ftmp10], %[ftmp3], %[ftmp1] \n\t"
125  "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
126  "paddh %[ftmp11], %[ftmp4], %[ftmp5] \n\t"
127  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
128  "psubh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
129  "sdc1 %[ftmp7], 0x00(%[block]) \n\t"
130  "sdc1 %[ftmp7], 0x08(%[block]) \n\t"
131  "sdc1 %[ftmp7], 0x10(%[block]) \n\t"
132  "sdc1 %[ftmp7], 0x18(%[block]) \n\t"
133  "uld %[low32], 0x00(%[dst]) \n\t"
134  "mtc1 %[low32], %[ftmp2] \n\t"
135  "psrah %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
136  "gslwxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
137  "psrah %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
138  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
139  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
140  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
141  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
142  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
143  "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
144  "gsswlc1 %[ftmp2], 0x03(%[dst]) \n\t"
145  "gsswrc1 %[ftmp2], 0x00(%[dst]) \n\t"
146  "gsswxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
147  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
148  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
149  "uld %[low32], 0x00(%[dst]) \n\t"
150  "mtc1 %[low32], %[ftmp2] \n\t"
151  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
152  "gslwxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
153  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
154  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
155  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
156  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
157  "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
158  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
159  "gsswlc1 %[ftmp2], 0x03(%[dst]) \n\t"
160  "gsswrc1 %[ftmp2], 0x00(%[dst]) \n\t"
161  "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
162  "gsswxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
163  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
164  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
165  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
166  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
167  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
168  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
169  [tmp0]"=&r"(tmp[0]),
170  [low32]"=&r"(low32)
171  : [dst]"r"(dst), [block]"r"(block),
172  [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32)
173  : "memory"
174  );
175 
176  memset(block, 0, 32);
177 }
178 
179 void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
180 {
181  double ftmp[16];
182  uint64_t tmp[8];
183  mips_reg addr[1];
184  uint64_t low32;
185 
186  __asm__ volatile (
187  "lhu %[tmp0], 0x00(%[block]) \n\t"
188  PTR_ADDI "$29, $29, -0x20 \n\t"
189  PTR_ADDIU "%[tmp0], %[tmp0], 0x20 \n\t"
190  "ldc1 %[ftmp1], 0x10(%[block]) \n\t"
191  "sh %[tmp0], 0x00(%[block]) \n\t"
192  "ldc1 %[ftmp2], 0x20(%[block]) \n\t"
193  "dli %[tmp0], 0x01 \n\t"
194  "ldc1 %[ftmp3], 0x30(%[block]) \n\t"
195  "mtc1 %[tmp0], %[ftmp8] \n\t"
196  "ldc1 %[ftmp5], 0x50(%[block]) \n\t"
197  "ldc1 %[ftmp6], 0x60(%[block]) \n\t"
198  "ldc1 %[ftmp7], 0x70(%[block]) \n\t"
199  "mov.d %[ftmp0], %[ftmp1] \n\t"
200  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
201  "psrah %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
202  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
203  "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
204  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
205  "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
206  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
207  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
208  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
209  "psubh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
210  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
211  "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
212  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
213  "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
214  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
215  "dli %[tmp0], 0x02 \n\t"
216  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
217  "mtc1 %[tmp0], %[ftmp9] \n\t"
218  "mov.d %[ftmp7], %[ftmp1] \n\t"
219  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
220  "psrah %[ftmp3], %[ftmp4], %[ftmp9] \n\t"
221  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
222  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
223  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
224  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
225  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
226  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
227  "mov.d %[ftmp5], %[ftmp6] \n\t"
228  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
229  "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
230  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
231  "psubh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
232  "ldc1 %[ftmp2], 0x00(%[block]) \n\t"
233  "ldc1 %[ftmp5], 0x40(%[block]) \n\t"
234  "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
235  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
236  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
237  "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
238  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
239  "paddh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
240  "psubh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
241  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
242  "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
243  "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
244  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
245  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
246  "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
247  "paddh %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
248  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
249  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
250  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
251  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
252  "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
253  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
254  "sdc1 %[ftmp6], 0x00(%[block]) \n\t"
255  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
256  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
257  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
258  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp1] \n\t"
259  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
260  "punpckhwd %[ftmp1], %[ftmp7], %[ftmp3] \n\t"
261  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
262  "punpckhwd %[ftmp3], %[ftmp6], %[ftmp0] \n\t"
263  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
264  "ldc1 %[ftmp0], 0x00(%[block]) \n\t"
265  "sdc1 %[ftmp7], 0x00($29) \n\t"
266  "sdc1 %[ftmp1], 0x10($29) \n\t"
267  "dmfc1 %[tmp1], %[ftmp6] \n\t"
268  "dmfc1 %[tmp3], %[ftmp3] \n\t"
269  "punpckhhw %[ftmp3], %[ftmp5], %[ftmp2] \n\t"
270  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
271  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp0] \n\t"
272  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
273  "punpckhwd %[ftmp0], %[ftmp5], %[ftmp4] \n\t"
274  "punpcklwd %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
275  "punpckhwd %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
276  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
277  "sdc1 %[ftmp5], 0x08($29) \n\t"
278  "sdc1 %[ftmp0], 0x18($29) \n\t"
279  "dmfc1 %[tmp2], %[ftmp3] \n\t"
280  "dmfc1 %[tmp4], %[ftmp4] \n\t"
281  "ldc1 %[ftmp1], 0x18(%[block]) \n\t"
282  "ldc1 %[ftmp6], 0x28(%[block]) \n\t"
283  "ldc1 %[ftmp2], 0x38(%[block]) \n\t"
284  "ldc1 %[ftmp0], 0x58(%[block]) \n\t"
285  "ldc1 %[ftmp3], 0x68(%[block]) \n\t"
286  "ldc1 %[ftmp4], 0x78(%[block]) \n\t"
287  "mov.d %[ftmp7], %[ftmp1] \n\t"
288  "psrah %[ftmp5], %[ftmp0], %[ftmp8] \n\t"
289  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
290  "paddh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
291  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
292  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
293  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
294  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
295  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
296  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
297  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
298  "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
299  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
300  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
301  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
302  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
303  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
304  "mov.d %[ftmp4], %[ftmp1] \n\t"
305  "psrah %[ftmp2], %[ftmp5], %[ftmp9] \n\t"
306  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
307  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
308  "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
309  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
310  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
311  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
312  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
313  "mov.d %[ftmp0], %[ftmp3] \n\t"
314  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
315  "psrah %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
316  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
317  "psubh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
318  "ldc1 %[ftmp6], 0x08(%[block]) \n\t"
319  "ldc1 %[ftmp0], 0x48(%[block]) \n\t"
320  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
321  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
322  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
323  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
324  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
325  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
326  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
327  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
328  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
329  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
330  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
331  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
332  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
333  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
334  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
335  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
336  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
337  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
338  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
339  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
340  "sdc1 %[ftmp3], 0x08(%[block]) \n\t"
341  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
342  "punpckhhw %[ftmp3], %[ftmp4], %[ftmp7] \n\t"
343  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
344  "punpckhhw %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
345  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
346  "punpckhwd %[ftmp1], %[ftmp4], %[ftmp2] \n\t"
347  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
348  "punpckhwd %[ftmp2], %[ftmp3], %[ftmp7] \n\t"
349  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
350  "ldc1 %[ftmp7], 0x08(%[block]) \n\t"
351  "dmfc1 %[tmp5], %[ftmp4] \n\t"
352  "dmfc1 %[tmp7], %[ftmp1] \n\t"
353  "mov.d %[ftmp12], %[ftmp3] \n\t"
354  "mov.d %[ftmp14], %[ftmp2] \n\t"
355  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp6] \n\t"
356  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
357  "punpckhhw %[ftmp6], %[ftmp5], %[ftmp7] \n\t"
358  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
359  "punpckhwd %[ftmp7], %[ftmp0], %[ftmp5] \n\t"
360  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
361  "punpckhwd %[ftmp5], %[ftmp2], %[ftmp6] \n\t"
362  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
363  "dmfc1 %[tmp6], %[ftmp0] \n\t"
364  "mov.d %[ftmp11], %[ftmp7] \n\t"
365  "mov.d %[ftmp13], %[ftmp2] \n\t"
366  "mov.d %[ftmp15], %[ftmp5] \n\t"
367  PTR_ADDIU "%[addr0], %[dst], 0x04 \n\t"
368  "dmtc1 %[tmp7], %[ftmp7] \n\t"
369  "dmtc1 %[tmp3], %[ftmp6] \n\t"
370  "ldc1 %[ftmp1], 0x10($29) \n\t"
371  "dmtc1 %[tmp1], %[ftmp3] \n\t"
372  "mov.d %[ftmp4], %[ftmp1] \n\t"
373  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
374  "psrah %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
375  "paddh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
376  "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
377  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
378  "paddh %[ftmp0], %[ftmp0], %[ftmp14] \n\t"
379  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
380  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
381  "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
382  "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
383  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
384  "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t"
385  "psubh %[ftmp7], %[ftmp7], %[ftmp14] \n\t"
386  "psrah %[ftmp5], %[ftmp14], %[ftmp8] \n\t"
387  "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
388  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
389  "mov.d %[ftmp5], %[ftmp1] \n\t"
390  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
391  "psrah %[ftmp6], %[ftmp0], %[ftmp9] \n\t"
392  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
393  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
394  "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
395  "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
396  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
397  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
398  "mov.d %[ftmp7], %[ftmp12] \n\t"
399  "psrah %[ftmp2], %[ftmp12], %[ftmp8] \n\t"
400  "psrah %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
401  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
402  "psubh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
403  "ldc1 %[ftmp3], 0x00($29) \n\t"
404  "dmtc1 %[tmp5], %[ftmp7] \n\t"
405  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
406  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
407  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
408  "psubh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
409  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
410  "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
411  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
412  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
413  "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
414  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
415  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
416  "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
417  "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
418  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
419  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
420  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
421  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
422  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
423  "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
424  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
425  "sdc1 %[ftmp3], 0x00($29) \n\t"
426  "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
427  "sdc1 %[ftmp0], 0x10($29) \n\t"
428  "dmfc1 %[tmp1], %[ftmp2] \n\t"
429  "xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
430  "sdc1 %[ftmp2], 0x00(%[block]) \n\t"
431  "sdc1 %[ftmp2], 0x08(%[block]) \n\t"
432  "sdc1 %[ftmp2], 0x10(%[block]) \n\t"
433  "sdc1 %[ftmp2], 0x18(%[block]) \n\t"
434  "sdc1 %[ftmp2], 0x20(%[block]) \n\t"
435  "sdc1 %[ftmp2], 0x28(%[block]) \n\t"
436  "sdc1 %[ftmp2], 0x30(%[block]) \n\t"
437  "sdc1 %[ftmp2], 0x38(%[block]) \n\t"
438  "sdc1 %[ftmp2], 0x40(%[block]) \n\t"
439  "sdc1 %[ftmp2], 0x48(%[block]) \n\t"
440  "sdc1 %[ftmp2], 0x50(%[block]) \n\t"
441  "sdc1 %[ftmp2], 0x58(%[block]) \n\t"
442  "sdc1 %[ftmp2], 0x60(%[block]) \n\t"
443  "sdc1 %[ftmp2], 0x68(%[block]) \n\t"
444  "sdc1 %[ftmp2], 0x70(%[block]) \n\t"
445  "sdc1 %[ftmp2], 0x78(%[block]) \n\t"
446  "dli %[tmp3], 0x06 \n\t"
447  "uld %[low32], 0x00(%[dst]) \n\t"
448  "mtc1 %[low32], %[ftmp3] \n\t"
449  "mtc1 %[tmp3], %[ftmp10] \n\t"
450  "gslwxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
451  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
452  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
453  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
454  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
455  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
456  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
457  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
458  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
459  "gsswlc1 %[ftmp3], 0x03(%[dst]) \n\t"
460  "gsswrc1 %[ftmp3], 0x00(%[dst]) \n\t"
461  "gsswxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
462  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
463  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
464  "uld %[low32], 0x00(%[dst]) \n\t"
465  "mtc1 %[low32], %[ftmp3] \n\t"
466  "gslwxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
467  "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
468  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
469  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
470  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
471  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
472  "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
473  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
474  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
475  "gsswlc1 %[ftmp3], 0x03(%[dst]) \n\t"
476  "gsswrc1 %[ftmp3], 0x00(%[dst]) \n\t"
477  "gsswxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
478  "ldc1 %[ftmp5], 0x00($29) \n\t"
479  "ldc1 %[ftmp4], 0x10($29) \n\t"
480  "dmtc1 %[tmp1], %[ftmp6] \n\t"
481  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
482  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
483  "uld %[low32], 0x00(%[dst]) \n\t"
484  "mtc1 %[low32], %[ftmp3] \n\t"
485  "gslwxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
486  "psrah %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
487  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
488  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
489  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
490  "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
491  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
492  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
493  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
494  "gsswlc1 %[ftmp3], 0x03(%[dst]) \n\t"
495  "gsswrc1 %[ftmp3], 0x00(%[dst]) \n\t"
496  "gsswxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
497  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
498  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
499  "uld %[low32], 0x00(%[dst]) \n\t"
500  "mtc1 %[low32], %[ftmp3] \n\t"
501  "gslwxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
502  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
503  "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
504  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
505  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
506  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
507  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
508  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
509  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
510  "gsswlc1 %[ftmp3], 0x03(%[dst]) \n\t"
511  "gsswrc1 %[ftmp3], 0x00(%[dst]) \n\t"
512  "gsswxc1 %[ftmp0], 0x00(%[dst], %[stride]) \n\t"
513  "dmtc1 %[tmp4], %[ftmp1] \n\t"
514  "dmtc1 %[tmp2], %[ftmp6] \n\t"
515  "ldc1 %[ftmp4], 0x18($29) \n\t"
516  "mov.d %[ftmp5], %[ftmp4] \n\t"
517  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
518  "psrah %[ftmp7], %[ftmp11], %[ftmp8] \n\t"
519  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
520  "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
521  "paddh %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
522  "paddh %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
523  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
524  "paddh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
525  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
526  "psubh %[ftmp3], %[ftmp11], %[ftmp1] \n\t"
527  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
528  "paddh %[ftmp5], %[ftmp5], %[ftmp15] \n\t"
529  "psubh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
530  "psrah %[ftmp2], %[ftmp15], %[ftmp8] \n\t"
531  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
532  "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
533  "mov.d %[ftmp2], %[ftmp4] \n\t"
534  "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
535  "psrah %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
536  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
537  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
538  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
539  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
540  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
541  "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
542  "mov.d %[ftmp3], %[ftmp13] \n\t"
543  "psrah %[ftmp0], %[ftmp13], %[ftmp8] \n\t"
544  "psrah %[ftmp7], %[ftmp6], %[ftmp8] \n\t"
545  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
546  "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
547  "ldc1 %[ftmp6], 0x08($29) \n\t"
548  "dmtc1 %[tmp6], %[ftmp3] \n\t"
549  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
550  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
551  "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
552  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
553  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
554  "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
555  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
556  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
557  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
558  "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
559  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
560  "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
561  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
562  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
563  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
564  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
565  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
566  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
567  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
568  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
569  "sdc1 %[ftmp6], 0x08($29) \n\t"
570  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
571  "sdc1 %[ftmp7], 0x18($29) \n\t"
572  "dmfc1 %[tmp2], %[ftmp0] \n\t"
573  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
574  "uld %[low32], 0x00(%[addr0]) \n\t"
575  "mtc1 %[low32], %[ftmp6] \n\t"
576  "gslwxc1 %[ftmp7], 0x00(%[addr0], %[stride]) \n\t"
577  "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
578  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
579  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
580  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
581  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
582  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
583  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
584  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
585  "gsswlc1 %[ftmp6], 0x03(%[addr0]) \n\t"
586  "gsswrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
587  "gsswxc1 %[ftmp7], 0x00(%[addr0], %[stride]) \n\t"
588  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
589  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
590  "uld %[low32], 0x00(%[addr0]) \n\t"
591  "mtc1 %[low32], %[ftmp6] \n\t"
592  "gslwxc1 %[ftmp7], 0x00(%[addr0], %[stride]) \n\t"
593  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
594  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
595  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
596  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
597  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
598  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
599  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
600  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
601  "gsswlc1 %[ftmp6], 0x03(%[addr0]) \n\t"
602  "gsswrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
603  "gsswxc1 %[ftmp7], 0x00(%[addr0], %[stride]) \n\t"
604  "ldc1 %[ftmp2], 0x08($29) \n\t"
605  "ldc1 %[ftmp5], 0x18($29) \n\t"
606  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
607  "dmtc1 %[tmp2], %[ftmp1] \n\t"
608  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
609  "uld %[low32], 0x00(%[addr0]) \n\t"
610  "mtc1 %[low32], %[ftmp6] \n\t"
611  "gslwxc1 %[ftmp7], 0x00(%[addr0], %[stride]) \n\t"
612  "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
613  "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
614  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
615  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
616  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
617  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
618  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
619  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
620  "gsswlc1 %[ftmp6], 0x03(%[addr0]) \n\t"
621  "gsswrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
622  "gsswxc1 %[ftmp7], 0x00(%[addr0], %[stride]) \n\t"
623  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
624  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
625  "uld %[low32], 0x00(%[addr0]) \n\t"
626  "mtc1 %[low32], %[ftmp6] \n\t"
627  "gslwxc1 %[ftmp7], 0x00(%[addr0], %[stride]) \n\t"
628  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
629  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
630  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
631  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
632  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
633  "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
634  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
635  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
636  "gsswlc1 %[ftmp6], 0x03(%[addr0]) \n\t"
637  "gsswrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
638  "gsswxc1 %[ftmp7], 0x00(%[addr0], %[stride]) \n\t"
639  PTR_ADDIU "$29, $29, 0x20 \n\t"
640  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
641  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
642  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
643  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
644  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
645  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
646  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
647  [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
648  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
649  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
650  [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
651  [tmp6]"=&r"(tmp[6]), [tmp7]"=&r"(tmp[7]),
652  [addr0]"=&r"(addr[0]),
653  [low32]"=&r"(low32)
654  : [dst]"r"(dst), [block]"r"(block),
655  [stride]"r"((mips_reg)stride)
656  : "$29","memory"
657  );
658 
659  memset(block, 0, 128);
660 }
661 
663 {
664  int dc = (block[0] + 32) >> 6;
665  double ftmp[6];
666  uint64_t low32;
667 
668  block[0] = 0;
669 
670  __asm__ volatile (
671  "mtc1 %[dc], %[ftmp5] \n\t"
672  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
673  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
674  "uld %[low32], 0x00(%[dst0]) \n\t"
675  "mtc1 %[low32], %[ftmp1] \n\t"
676  "uld %[low32], 0x00(%[dst1]) \n\t"
677  "mtc1 %[low32], %[ftmp2] \n\t"
678  "uld %[low32], 0x00(%[dst2]) \n\t"
679  "mtc1 %[low32], %[ftmp3] \n\t"
680  "uld %[low32], 0x00(%[dst3]) \n\t"
681  "mtc1 %[low32], %[ftmp4] \n\t"
682  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
683  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
684  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
685  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
686  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
687  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
688  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
689  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
690  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
691  "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
692  "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
693  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
694  "gsswlc1 %[ftmp1], 0x03(%[dst0]) \n\t"
695  "gsswrc1 %[ftmp1], 0x00(%[dst0]) \n\t"
696  "gsswlc1 %[ftmp2], 0x03(%[dst1]) \n\t"
697  "gsswrc1 %[ftmp2], 0x00(%[dst1]) \n\t"
698  "gsswlc1 %[ftmp3], 0x03(%[dst2]) \n\t"
699  "gsswrc1 %[ftmp3], 0x00(%[dst2]) \n\t"
700  "gsswlc1 %[ftmp4], 0x03(%[dst3]) \n\t"
701  "gsswrc1 %[ftmp4], 0x00(%[dst3]) \n\t"
702  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
703  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
704  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
705  [low32]"=&r"(low32)
706  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
707  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
708  [dc]"r"(dc)
709  : "memory"
710  );
711 }
712 
714 {
715  int dc = (block[0] + 32) >> 6;
716  double ftmp[10];
717 
718  block[0] = 0;
719 
720  __asm__ volatile (
721  "mtc1 %[dc], %[ftmp5] \n\t"
722  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
723  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
724  "ldc1 %[ftmp1], 0x00(%[dst0]) \n\t"
725  "ldc1 %[ftmp2], 0x00(%[dst1]) \n\t"
726  "ldc1 %[ftmp3], 0x00(%[dst2]) \n\t"
727  "ldc1 %[ftmp4], 0x00(%[dst3]) \n\t"
728  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
729  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
730  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
731  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
732  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
733  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
734  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
735  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
736  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
737  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
738  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
739  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
740  "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
741  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
742  "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
743  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
744  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
745  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
746  "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
747  "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
748  "sdc1 %[ftmp1], 0x00(%[dst0]) \n\t"
749  "sdc1 %[ftmp2], 0x00(%[dst1]) \n\t"
750  "sdc1 %[ftmp3], 0x00(%[dst2]) \n\t"
751  "sdc1 %[ftmp4], 0x00(%[dst3]) \n\t"
752 
753  "ldc1 %[ftmp1], 0x00(%[dst4]) \n\t"
754  "ldc1 %[ftmp2], 0x00(%[dst5]) \n\t"
755  "ldc1 %[ftmp3], 0x00(%[dst6]) \n\t"
756  "ldc1 %[ftmp4], 0x00(%[dst7]) \n\t"
757  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
758  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
759  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
760  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
761  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
762  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
763  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
764  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
765  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
766  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
767  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
768  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
769  "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
770  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
771  "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
772  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
773  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
774  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
775  "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
776  "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
777  "sdc1 %[ftmp1], 0x00(%[dst4]) \n\t"
778  "sdc1 %[ftmp2], 0x00(%[dst5]) \n\t"
779  "sdc1 %[ftmp3], 0x00(%[dst6]) \n\t"
780  "sdc1 %[ftmp4], 0x00(%[dst7]) \n\t"
781  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
782  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
783  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
784  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
785  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9])
786  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
787  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
788  [dst4]"r"(dst+4*stride), [dst5]"r"(dst+5*stride),
789  [dst6]"r"(dst+6*stride), [dst7]"r"(dst+7*stride),
790  [dc]"r"(dc)
791  : "memory"
792  );
793 }
794 
795 void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
796  int16_t *block, int stride, const uint8_t nnzc[15*8])
797 {
798  int i;
799  for(i=0; i<16; i++){
800  int nnz = nnzc[ scan8[i] ];
801  if(nnz){
802  if(nnz==1 && ((int16_t*)block)[i*16])
803  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
804  stride);
805  else
806  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
807  stride);
808  }
809  }
810 }
811 
812 void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
813  int16_t *block, int stride, const uint8_t nnzc[15*8])
814 {
815  int i;
816  for(i=0; i<16; i++){
817  if(nnzc[ scan8[i] ])
818  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
819  else if(((int16_t*)block)[i*16])
820  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
821  stride);
822  }
823 }
824 
825 void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
826  int16_t *block, int stride, const uint8_t nnzc[15*8])
827 {
828  int i;
829  for(i=0; i<16; i+=4){
830  int nnz = nnzc[ scan8[i] ];
831  if(nnz){
832  if(nnz==1 && ((int16_t*)block)[i*16])
833  ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
834  block + i*16, stride);
835  else
836  ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
837  stride);
838  }
839  }
840 }
841 
842 void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
843  int16_t *block, int stride, const uint8_t nnzc[15*8])
844 {
845  int i, j;
846  for(j=1; j<3; j++){
847  for(i=j*16; i<j*16+4; i++){
848  if(nnzc[ scan8[i] ])
849  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
850  block + i*16, stride);
851  else if(((int16_t*)block)[i*16])
852  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
853  block + i*16, stride);
854  }
855  }
856 }
857 
858 void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
859  int16_t *block, int stride, const uint8_t nnzc[15*8])
860 {
861  int i, j;
862 
863  for(j=1; j<3; j++){
864  for(i=j*16; i<j*16+4; i++){
865  if(nnzc[ scan8[i] ])
866  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
867  block + i*16, stride);
868  else if(((int16_t*)block)[i*16])
869  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
870  block + i*16, stride);
871  }
872  }
873 
874  for(j=1; j<3; j++){
875  for(i=j*16+4; i<j*16+8; i++){
876  if(nnzc[ scan8[i+4] ])
877  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
878  block + i*16, stride);
879  else if(((int16_t*)block)[i*16])
880  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
881  block + i*16, stride);
882  }
883  }
884 }
885 
886 void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
887  int qmul)
888 {
889  double ftmp[10];
890  uint64_t tmp[2];
891 
892  __asm__ volatile (
893  ".set noreorder \n\t"
894  "dli %[tmp0], 0x08 \n\t"
895  "ldc1 %[ftmp3], 0x18(%[input]) \n\t"
896  "mtc1 %[tmp0], %[ftmp8] \n\t"
897  "ldc1 %[ftmp2], 0x10(%[input]) \n\t"
898  "dli %[tmp0], 0x20 \n\t"
899  "ldc1 %[ftmp1], 0x08(%[input]) \n\t"
900  "mtc1 %[tmp0], %[ftmp9] \n\t"
901  "ldc1 %[ftmp0], 0x00(%[input]) \n\t"
902  "mov.d %[ftmp4], %[ftmp3] \n\t"
903  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
904  "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
905  "mov.d %[ftmp4], %[ftmp1] \n\t"
906  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
907  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
908  "mov.d %[ftmp4], %[ftmp3] \n\t"
909  "paddh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
910  "psubh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
911  "mov.d %[ftmp4], %[ftmp2] \n\t"
912  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
913  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
914  "mov.d %[ftmp4], %[ftmp3] \n\t"
915  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
916  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
917  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
918  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
919  "punpckhwd %[ftmp2], %[ftmp3], %[ftmp0] \n\t"
920  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
921  "mov.d %[ftmp0], %[ftmp4] \n\t"
922  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
923  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
924  "mov.d %[ftmp1], %[ftmp0] \n\t"
925  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
926  "psubh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
927  "mov.d %[ftmp1], %[ftmp2] \n\t"
928  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
929  "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
930  "mov.d %[ftmp1], %[ftmp0] \n\t"
931  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
932  "psubh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
933  "mov.d %[ftmp1], %[ftmp4] \n\t"
934  "daddi %[tmp0], %[qmul], -0x7fff \n\t"
935  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
936  "bgtz %[tmp0], 1f \n\t"
937  "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
938  "ori %[tmp0], $0, 0x80 \n\t"
939  "dsll %[tmp0], %[tmp0], 0x10 \n\t"
940  "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
941  "daddu %[qmul], %[qmul], %[tmp0] \n\t"
942  "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
943  "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
944  "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
945  "mtc1 %[qmul], %[ftmp7] \n\t"
946  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
947  "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
948  "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
949  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
950  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
951  "psraw %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
952  "psraw %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
953  "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
954  "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
955  "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
956  "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
957  "dmfc1 %[tmp1], %[ftmp0] \n\t"
958  "dsrl %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
959  "mfc1 %[input], %[ftmp0] \n\t"
960  "sh %[tmp1], 0x00(%[output]) \n\t"
961  "sh %[input], 0x80(%[output]) \n\t"
962  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
963  PTR_SRL "%[input], %[input], 0x10 \n\t"
964  "sh %[tmp1], 0x20(%[output]) \n\t"
965  "sh %[input], 0xa0(%[output]) \n\t"
966  "dmfc1 %[tmp1], %[ftmp2] \n\t"
967  "dsrl %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
968  "mfc1 %[input], %[ftmp2] \n\t"
969  "sh %[tmp1], 0x40(%[output]) \n\t"
970  "sh %[input], 0xc0(%[output]) \n\t"
971  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
972  PTR_SRL "%[input], %[input], 0x10 \n\t"
973  "sh %[tmp1], 0x60(%[output]) \n\t"
974  "sh %[input], 0xe0(%[output]) \n\t"
975  "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
976  "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
977  "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
978  "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
979  "mtc1 %[qmul], %[ftmp7] \n\t"
980  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
981  "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
982  "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
983  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
984  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
985  "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
986  "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
987  "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
988  "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
989  "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
990  "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
991  "dmfc1 %[tmp1], %[ftmp3] \n\t"
992  "dsrl %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
993  "mfc1 %[input], %[ftmp3] \n\t"
994  "sh %[tmp1], 0x100(%[output]) \n\t"
995  "sh %[input], 0x180(%[output]) \n\t"
996  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
997  PTR_SRL "%[input], %[input], 0x10 \n\t"
998  "sh %[tmp1], 0x120(%[output]) \n\t"
999  "sh %[input], 0x1a0(%[output]) \n\t"
1000  "dmfc1 %[tmp1], %[ftmp4] \n\t"
1001  "dsrl %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1002  "mfc1 %[input], %[ftmp4] \n\t"
1003  "sh %[tmp1], 0x140(%[output]) \n\t"
1004  "sh %[input], 0x1c0(%[output]) \n\t"
1005  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1006  PTR_SRL "%[input], %[input], 0x10 \n\t"
1007  "sh %[tmp1], 0x160(%[output]) \n\t"
1008  "j 2f \n\t"
1009  "sh %[input], 0x1e0(%[output]) \n\t"
1010  "1: \n\t"
1011  "ori %[tmp0], $0, 0x1f \n\t"
1012  "clz %[tmp1], %[qmul] \n\t"
1013  "ori %[input], $0, 0x07 \n\t"
1014  "dsubu %[tmp1], %[tmp0], %[tmp1] \n\t"
1015  "ori %[tmp0], $0, 0x80 \n\t"
1016  "dsll %[tmp0], %[tmp0], 0x10 \n\t"
1017  "daddu %[qmul], %[qmul], %[tmp0] \n\t"
1018  "dsubu %[tmp0], %[tmp1], %[input] \n\t"
1019  "movn %[tmp1], %[input], %[tmp0] \n\t"
1020  PTR_ADDIU "%[input], %[input], 0x01 \n\t"
1021  "andi %[tmp0], %[tmp1], 0xff \n\t"
1022  "srlv %[qmul], %[qmul], %[tmp0] \n\t"
1023  PTR_SUBU "%[input], %[input], %[tmp1] \n\t"
1024  "mtc1 %[input], %[ftmp6] \n\t"
1025  "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
1026  "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
1027  "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
1028  "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
1029  "mtc1 %[qmul], %[ftmp7] \n\t"
1030  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1031  "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1032  "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1033  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1034  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1035  "psraw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1036  "psraw %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1037  "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1038  "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1039  "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1040  "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1041  "dmfc1 %[tmp1], %[ftmp0] \n\t"
1042  "dsrl %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1043  "sh %[tmp1], 0x00(%[output]) \n\t"
1044  "mfc1 %[input], %[ftmp0] \n\t"
1045  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1046  "sh %[input], 0x80(%[output]) \n\t"
1047  "sh %[tmp1], 0x20(%[output]) \n\t"
1048  PTR_SRL "%[input], %[input], 0x10 \n\t"
1049  "dmfc1 %[tmp1], %[ftmp2] \n\t"
1050  "sh %[input], 0xa0(%[output]) \n\t"
1051  "dsrl %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
1052  "sh %[tmp1], 0x40(%[output]) \n\t"
1053  "mfc1 %[input], %[ftmp2] \n\t"
1054  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1055  "sh %[input], 0xc0(%[output]) \n\t"
1056  "sh %[tmp1], 0x60(%[output]) \n\t"
1057  PTR_SRL "%[input], %[input], 0x10 \n\t"
1058  "sh %[input], 0xe0(%[output]) \n\t"
1059  "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
1060  "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
1061  "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
1062  "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
1063  "mtc1 %[qmul], %[ftmp7] \n\t"
1064  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1065  "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1066  "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1067  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1068  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1069  "psraw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1070  "psraw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1071  "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1072  "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1073  "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1074  "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1075  "dmfc1 %[tmp1], %[ftmp3] \n\t"
1076  "dsrl %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1077  "mfc1 %[input], %[ftmp3] \n\t"
1078  "sh %[tmp1], 0x100(%[output]) \n\t"
1079  "sh %[input], 0x180(%[output]) \n\t"
1080  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1081  PTR_SRL "%[input], %[input], 0x10 \n\t"
1082  "sh %[tmp1], 0x120(%[output]) \n\t"
1083  "sh %[input], 0x1a0(%[output]) \n\t"
1084  "dmfc1 %[tmp1], %[ftmp4] \n\t"
1085  "dsrl %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1086  "mfc1 %[input], %[ftmp4] \n\t"
1087  "sh %[tmp1], 0x140(%[output]) \n\t"
1088  "sh %[input], 0x1c0(%[output]) \n\t"
1089  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1090  PTR_SRL "%[input], %[input], 0x10 \n\t"
1091  "sh %[tmp1], 0x160(%[output]) \n\t"
1092  "sh %[input], 0x1e0(%[output]) \n\t"
1093  "2: \n\t"
1094  ".set reorder \n\t"
1095  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1096  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1097  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1098  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1099  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1100  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
1101  [output]"+&r"(output), [input]"+&r"(input),
1102  [qmul]"+&r"(qmul)
1103  : [ff_pw_1]"f"(ff_pw_1)
1104  : "memory"
1105  );
1106 }
1107 
1109 {
1110  int temp[8];
1111  int t[8];
1112 
1113  temp[0] = block[0] + block[16];
1114  temp[1] = block[0] - block[16];
1115  temp[2] = block[32] + block[48];
1116  temp[3] = block[32] - block[48];
1117  temp[4] = block[64] + block[80];
1118  temp[5] = block[64] - block[80];
1119  temp[6] = block[96] + block[112];
1120  temp[7] = block[96] - block[112];
1121 
1122  t[0] = temp[0] + temp[4] + temp[2] + temp[6];
1123  t[1] = temp[0] - temp[4] + temp[2] - temp[6];
1124  t[2] = temp[0] - temp[4] - temp[2] + temp[6];
1125  t[3] = temp[0] + temp[4] - temp[2] - temp[6];
1126  t[4] = temp[1] + temp[5] + temp[3] + temp[7];
1127  t[5] = temp[1] - temp[5] + temp[3] - temp[7];
1128  t[6] = temp[1] - temp[5] - temp[3] + temp[7];
1129  t[7] = temp[1] + temp[5] - temp[3] - temp[7];
1130 
1131  block[ 0]= (t[0]*qmul + 128) >> 8;
1132  block[ 32]= (t[1]*qmul + 128) >> 8;
1133  block[ 64]= (t[2]*qmul + 128) >> 8;
1134  block[ 96]= (t[3]*qmul + 128) >> 8;
1135  block[ 16]= (t[4]*qmul + 128) >> 8;
1136  block[ 48]= (t[5]*qmul + 128) >> 8;
1137  block[ 80]= (t[6]*qmul + 128) >> 8;
1138  block[112]= (t[7]*qmul + 128) >> 8;
1139 }
1140 
1142 {
1143  int a,b,c,d;
1144 
1145  d = block[0] - block[16];
1146  a = block[0] + block[16];
1147  b = block[32] - block[48];
1148  c = block[32] + block[48];
1149  block[0] = ((a+c)*qmul) >> 7;
1150  block[16]= ((d+b)*qmul) >> 7;
1151  block[32]= ((a-c)*qmul) >> 7;
1152  block[48]= ((d-b)*qmul) >> 7;
1153 }
1154 
1156  int log2_denom, int weight, int offset)
1157 {
1158  int y;
1159  double ftmp[8];
1160 
1161  offset <<= log2_denom;
1162 
1163  if (log2_denom)
1164  offset += 1 << (log2_denom - 1);
1165 
1166  for (y=0; y<height; y++, block+=stride) {
1167  __asm__ volatile (
1168  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1169  "ldc1 %[ftmp1], 0x00(%[block0]) \n\t"
1170  "ldc1 %[ftmp2], 0x00(%[block1]) \n\t"
1171  "mtc1 %[weight], %[ftmp3] \n\t"
1172  "mtc1 %[offset], %[ftmp4] \n\t"
1173  "mtc1 %[log2_denom], %[ftmp5] \n\t"
1174  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1175  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1176  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
1177  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
1178  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1179  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1180  "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1181  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1182  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1183  "pmullh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1184  "paddsh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1185  "paddsh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1186  "paddsh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1187  "paddsh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1188  "psrah %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1189  "psrah %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1190  "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1191  "psrah %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1192  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1193  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1194  "sdc1 %[ftmp1], 0x00(%[block0]) \n\t"
1195  "sdc1 %[ftmp2], 0x00(%[block1]) \n\t"
1196  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1197  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1198  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1199  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7])
1200  : [block0]"r"(block), [block1]"r"(block+8),
1201  [weight]"r"(weight), [offset]"r"(offset),
1202  [log2_denom]"r"(log2_denom)
1203  : "memory"
1204  );
1205  }
1206 }
1207 
1209  int height, int log2_denom, int weightd, int weights, int offset)
1210 {
1211  int y;
1212  double ftmp[9];
1213 
1214  offset = ((offset + 1) | 1) << log2_denom;
1215 
1216  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1217  __asm__ volatile (
1218  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1219  "ldc1 %[ftmp1], 0x00(%[src0]) \n\t"
1220  "ldc1 %[ftmp2], 0x00(%[dst0]) \n\t"
1221  "mtc1 %[weights], %[ftmp3] \n\t"
1222  "mtc1 %[weightd], %[ftmp4] \n\t"
1223  "mtc1 %[offset], %[ftmp5] \n\t"
1224  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1225  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1226  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1227  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1228  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1229  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1230  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1231  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1232  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1233  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1234  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1235  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1236  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1237  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1238  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1239  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1240  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1241  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1242  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1243  "sdc1 %[ftmp1], 0x00(%[dst0]) \n\t"
1244  "ldc1 %[ftmp1], 0x00(%[src1]) \n\t"
1245  "ldc1 %[ftmp2], 0x00(%[dst1]) \n\t"
1246  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1247  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1248  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1249  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1250  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1251  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1252  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1253  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1254  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1255  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1256  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1257  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1258  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1259  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1260  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1261  "sdc1 %[ftmp1], 0x00(%[dst1]) \n\t"
1262  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1263  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1264  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1265  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1266  [ftmp8]"=&f"(ftmp[8])
1267  : [dst0]"r"(dst), [dst1]"r"(dst+8),
1268  [src0]"r"(src), [src1]"r"(src+8),
1269  [weights]"r"(weights), [weightd]"r"(weightd),
1270  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1271  : "memory"
1272  );
1273  }
1274 }
1275 
1277  int log2_denom, int weight, int offset)
1278 {
1279  int y;
1280  double ftmp[6];
1281 
1282  offset <<= log2_denom;
1283 
1284  if (log2_denom)
1285  offset += 1 << (log2_denom - 1);
1286 
1287  for (y=0; y<height; y++, block+=stride) {
1288  __asm__ volatile (
1289  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1290  "ldc1 %[ftmp1], 0x00(%[block]) \n\t"
1291  "mtc1 %[weight], %[ftmp2] \n\t"
1292  "mtc1 %[offset], %[ftmp3] \n\t"
1293  "mtc1 %[log2_denom], %[ftmp5] \n\t"
1294  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1295  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1296  "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
1297  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1298  "pmullh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
1299  "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1300  "paddsh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1301  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1302  "psrah %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1303  "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1304  "packushb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1305  "sdc1 %[ftmp1], 0x00(%[block]) \n\t"
1306  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1307  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1308  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5])
1309  : [block]"r"(block), [weight]"r"(weight),
1310  [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1311  : "memory"
1312  );
1313  }
1314 }
1315 
1317  int height, int log2_denom, int weightd, int weights, int offset)
1318 {
1319  int y;
1320  double ftmp[9];
1321 
1322  offset = ((offset + 1) | 1) << log2_denom;
1323 
1324  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1325  __asm__ volatile (
1326  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1327  "ldc1 %[ftmp1], 0x00(%[src]) \n\t"
1328  "ldc1 %[ftmp2], 0x00(%[dst]) \n\t"
1329  "mtc1 %[weights], %[ftmp3] \n\t"
1330  "mtc1 %[weightd], %[ftmp4] \n\t"
1331  "mtc1 %[offset], %[ftmp5] \n\t"
1332  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1333  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1334  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1335  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1336  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1337  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1338  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1339  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1340  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1341  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1342  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1343  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1344  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1345  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1346  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1347  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1348  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1349  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1350  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1351  "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
1352  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1353  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1354  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1355  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1356  [ftmp8]"=&f"(ftmp[8])
1357  : [dst]"r"(dst), [src]"r"(src),
1358  [weights]"r"(weights), [weightd]"r"(weightd),
1359  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1360  : "memory"
1361  );
1362  }
1363 }
1364 
1366  int log2_denom, int weight, int offset)
1367 {
1368  int y;
1369  double ftmp[5];
1370  uint64_t low32;
1371 
1372  offset <<= log2_denom;
1373 
1374  if (log2_denom)
1375  offset += 1 << (log2_denom - 1);
1376 
1377  for (y=0; y<height; y++, block+=stride) {
1378  __asm__ volatile (
1379  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1380  "uld %[low32], 0x00(%[block]) \n\t"
1381  "mtc1 %[low32], %[ftmp1] \n\t"
1382  "mtc1 %[weight], %[ftmp2] \n\t"
1383  "mtc1 %[offset], %[ftmp3] \n\t"
1384  "mtc1 %[log2_denom], %[ftmp4] \n\t"
1385  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1386  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1387  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1388  "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1389  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1390  "psrah %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1391  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1392  "gsswlc1 %[ftmp1], 0x03(%[block]) \n\t"
1393  "gsswrc1 %[ftmp1], 0x00(%[block]) \n\t"
1394  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1395  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1396  [ftmp4]"=&f"(ftmp[4]),
1397  [low32]"=&r"(low32)
1398  : [block]"r"(block), [weight]"r"(weight),
1399  [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1400  : "memory"
1401  );
1402  }
1403 }
1404 
1406  int height, int log2_denom, int weightd, int weights, int offset)
1407 {
1408  int y;
1409  double ftmp[7];
1410  uint64_t low32;
1411 
1412  offset = ((offset + 1) | 1) << log2_denom;
1413 
1414  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1415  __asm__ volatile (
1416  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1417  "uld %[low32], 0x00(%[src]) \n\t"
1418  "mtc1 %[low32], %[ftmp1] \n\t"
1419  "uld %[low32], 0x00(%[dst]) \n\t"
1420  "mtc1 %[low32], %[ftmp2] \n\t"
1421  "mtc1 %[weight], %[ftmp3] \n\t"
1422  "mtc1 %[weightd], %[ftmp4] \n\t"
1423  "mtc1 %[offset], %[ftmp5] \n\t"
1424  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1425  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1426  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1427  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1428  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1429  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1430  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1431  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1432  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1433  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1434  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1435  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1436  "gsswlc1 %[ftmp1], 0x03(%[dst]) \n\t"
1437  "gsswrc1 %[ftmp1], 0x00(%[dst]) \n\t"
1438  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1439  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1440  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1441  [ftmp6]"=&f"(ftmp[6]),
1442  [low32]"=&r"(low32)
1443  : [dst]"r"(dst), [src]"r"(src),
1444  [weight]"r"(weights), [weightd]"r"(weightd),
1445  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1446  : "memory"
1447  );
1448  }
1449 }
1450 
1451 void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1452  int8_t *tc0)
1453 {
1454  double ftmp[12];
1455  mips_reg addr[2];
1456  uint64_t low32;
1457 
1458  __asm__ volatile (
1459  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1460  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1461  PTR_ADDU "%[addr1], %[stride], %[addr0] \n\t"
1462  "addi %[alpha], %[alpha], -0x01 \n\t"
1463  PTR_SUBU "%[addr1], $0, %[addr1] \n\t"
1464  "addi %[beta], %[beta], -0x01 \n\t"
1465  PTR_ADDU "%[addr1], %[addr1], %[pix] \n\t"
1466  "ldc1 %[ftmp3], 0x00(%[pix]) \n\t"
1467  "gsldxc1 %[ftmp1], 0x00(%[addr1], %[stride]) \n\t"
1468  "gsldxc1 %[ftmp2], 0x00(%[addr1], %[addr0]) \n\t"
1469  "gsldxc1 %[ftmp4], 0x00(%[pix], %[stride]) \n\t"
1470  "mtc1 %[alpha], %[ftmp5] \n\t"
1471  "mtc1 %[beta], %[ftmp6] \n\t"
1472  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1473  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1474  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1475  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1476  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1477  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1478  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1479  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1480  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1481  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1482  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1483  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1484  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1485  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1486  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1487  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1488  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1489  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1490  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1491  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
1492  "uld %[low32], 0x00(%[tc0]) \n\t"
1493  "mtc1 %[low32], %[ftmp5] \n\t"
1494  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1495  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp5] \n\t"
1496  "pcmpgtb %[ftmp5], %[ftmp9], %[ftmp4] \n\t"
1497  "ldc1 %[ftmp4], 0x00(%[addr1]) \n\t"
1498  "and %[ftmp10], %[ftmp5], %[ftmp8] \n\t"
1499  "psubusb %[ftmp8], %[ftmp4], %[ftmp2] \n\t"
1500  "psubusb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1501  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1502  "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1503  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1504  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1505  "and %[ftmp5], %[ftmp10], %[ftmp9] \n\t"
1506  "psubb %[ftmp8], %[ftmp5], %[ftmp7] \n\t"
1507  "and %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1508  "pavgb %[ftmp5], %[ftmp2], %[ftmp3] \n\t"
1509  "ldc1 %[ftmp11], 0x00(%[addr1]) \n\t"
1510  "pavgb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1511  "xor %[ftmp5], %[ftmp5], %[ftmp11] \n\t"
1512  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1513  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1514  "psubusb %[ftmp5], %[ftmp1], %[ftmp7] \n\t"
1515  "paddusb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1516  "pmaxub %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1517  "pminub %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1518  "gssdxc1 %[ftmp4], 0x00(%[addr1], %[stride]) \n\t"
1519  "gsldxc1 %[ftmp5], 0x00(%[pix], %[addr0]) \n\t"
1520  "psubusb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
1521  "psubusb %[ftmp7], %[ftmp3], %[ftmp5] \n\t"
1522  "psubusb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1523  "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1524  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1525  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1526  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1527  "and %[ftmp6], %[ftmp9], %[ftmp7] \n\t"
1528  "gsldxc1 %[ftmp4], 0x00(%[pix], %[stride]) \n\t"
1529  "pavgb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1530  "gsldxc1 %[ftmp11], 0x00(%[pix], %[addr0]) \n\t"
1531  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1532  "xor %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1533  "and %[ftmp7], %[ftmp7], %[ff_pb_1] \n\t"
1534  "psubusb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1535  "psubusb %[ftmp7], %[ftmp4], %[ftmp6] \n\t"
1536  "paddusb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1537  "pmaxub %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1538  "pminub %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1539  "gssdxc1 %[ftmp5], 0x00(%[pix], %[stride]) \n\t"
1540  "xor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1541  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1542  "and %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1543  "xor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1544  "xor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1545  "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1546  "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1547  "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1548  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1549  "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1550  "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1551  "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1552  "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1553  "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1554  "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1555  "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1556  "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1557  "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1558  "gssdxc1 %[ftmp2], 0x00(%[addr1], %[addr0]) \n\t"
1559  "sdc1 %[ftmp3], 0x00(%[pix]) \n\t"
1560  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1561  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1562  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1563  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1564  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1565  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1566  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
1567  [low32]"=&r"(low32)
1568  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1569  [alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta),
1570  [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1),
1571  [ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1)
1572  : "memory"
1573  );
1574 }
1575 
1577  int beta)
1578 {
1579  DECLARE_ALIGNED(8, const uint64_t, stack[0x0a]);
1580  double ftmp[16];
1581  uint64_t tmp[1];
1582  mips_reg addr[3];
1583 
1584 __asm__ volatile (
1585 "ori %[tmp0], $0, 0x01 \n\t"
1586 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1587 "mtc1 %[tmp0], %[ftmp9] \n\t"
1588 PTR_SLL "%[addr0], %[stride], 0x02 \n\t"
1589 PTR_ADDU "%[addr2], %[stride], %[stride] \n\t"
1590 PTR_ADDIU "%[alpha], %[alpha], -0x01 \n\t"
1591 PTR_SLL "%[ftmp11], %[ftmp9], %[ftmp9] \n\t"
1592 "bltz %[alpha], 1f \n\t"
1593 PTR_ADDU "%[addr1], %[addr2], %[stride] \n\t"
1594 PTR_ADDIU "%[beta], %[beta], -0x01 \n\t"
1595 "bltz %[beta], 1f \n\t"
1596 PTR_SUBU "%[addr0], $0, %[addr0] \n\t"
1597 PTR_ADDU "%[addr0], %[addr0], %[pix] \n\t"
1598 "ldc1 %[ftmp3], 0x00(%[pix]) \n\t"
1599 "gsldxc1 %[ftmp1], 0x00(%[addr0], %[addr2]) \n\t"
1600 "gsldxc1 %[ftmp2], 0x00(%[addr0], %[addr1]) \n\t"
1601 "gsldxc1 %[ftmp4], 0x00(%[pix], %[stride]) \n\t"
1602 "mtc1 %[alpha], %[ftmp5] \n\t"
1603 "mtc1 %[beta], %[ftmp6] \n\t"
1604 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1605 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1606 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1607 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1608 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1609 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1610 "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1611 "sdc1 %[ftmp5], 0x10+%[stack] \n\t"
1612 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1613 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1614 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1615 "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1616 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1617 "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1618 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1619 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1620 "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1621 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1622 "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1623 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1624 "ldc1 %[ftmp5], 0x10+%[stack] \n\t"
1625 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1626 "ldc1 %[ftmp10], %[ff_pb_1] \n\t"
1627 "sdc1 %[ftmp8], 0x20+%[stack] \n\t"
1628 "pavgb %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1629 "psubusb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1630 "pavgb %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
1631 "psubusb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1632 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1633 "psubusb %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1634 "ldc1 %[ftmp15], 0x20+%[stack] \n\t"
1635 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1636 "and %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
1637 "gsldxc1 %[ftmp15], 0x00(%[addr0], %[stride]) \n\t"
1638 "psubusb %[ftmp8], %[ftmp15], %[ftmp2] \n\t"
1639 "psubusb %[ftmp5], %[ftmp2], %[ftmp15] \n\t"
1640 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1641 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1642 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1643 "and %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1644 "gsldxc1 %[ftmp14], 0x00(%[pix], %[addr2]) \n\t"
1645 "sdc1 %[ftmp5], 0x30+%[stack] \n\t"
1646 "psubusb %[ftmp8], %[ftmp14], %[ftmp3] \n\t"
1647 "psubusb %[ftmp5], %[ftmp3], %[ftmp14] \n\t"
1648 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1649 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1650 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1651 "and %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1652 "sdc1 %[ftmp5], 0x40+%[stack] \n\t"
1653 "pavgb %[ftmp5], %[ftmp15], %[ftmp1] \n\t"
1654 "pavgb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1655 "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1656 "sdc1 %[ftmp6], 0x10+%[stack] \n\t"
1657 "paddb %[ftmp7], %[ftmp15], %[ftmp1] \n\t"
1658 "paddb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1659 "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1660 "mov.d %[ftmp8], %[ftmp7] \n\t"
1661 "sdc1 %[ftmp7], 0x00+%[stack] \n\t"
1662 "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1663 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1664 "xor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1665 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1666 "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1667 "pavgb %[ftmp6], %[ftmp15], %[ftmp4] \n\t"
1668 "psubb %[ftmp7], %[ftmp15], %[ftmp4] \n\t"
1669 "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1670 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1671 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1672 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1673 "ldc1 %[ftmp13], 0x10+%[stack] \n\t"
1674 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1675 "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1676 "pavgb %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1677 "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1678 "xor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1679 "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1680 "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1681 "xor %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
1682 "pavgb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1683 "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1684 "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1685 "ldc1 %[ftmp13], 0x30+%[stack] \n\t"
1686 "pavgb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1687 "ldc1 %[ftmp12], 0x20+%[stack] \n\t"
1688 "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1689 "xor %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
1690 "and %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1691 "and %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1692 "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1693 "xor %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1694 "gssdxc1 %[ftmp6], 0x00(%[addr0], %[addr1]) \n\t"
1695 "ldc1 %[ftmp6], 0x00(%[addr0]) \n\t"
1696 "paddb %[ftmp7], %[ftmp15], %[ftmp6] \n\t"
1697 "pavgb %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1698 "ldc1 %[ftmp12], 0x00+%[stack] \n\t"
1699 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1700 "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1701 "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1702 "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1703 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1704 "xor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1705 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1706 "ldc1 %[ftmp12], 0x30+%[stack] \n\t"
1707 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1708 "xor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1709 "xor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1710 "and %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1711 "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1712 "xor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1713 "xor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1714 "gssdxc1 %[ftmp5], 0x00(%[addr0], %[addr2]) \n\t"
1715 "gssdxc1 %[ftmp6], 0x00(%[addr0], %[stride]) \n\t"
1716 "pavgb %[ftmp5], %[ftmp14], %[ftmp4] \n\t"
1717 "pavgb %[ftmp6], %[ftmp3], %[ftmp2] \n\t"
1718 "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1719 "sdc1 %[ftmp6], 0x10+%[stack] \n\t"
1720 "paddb %[ftmp7], %[ftmp14], %[ftmp4] \n\t"
1721 "paddb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1722 "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1723 "mov.d %[ftmp8], %[ftmp7] \n\t"
1724 "sdc1 %[ftmp7], 0x00+%[stack] \n\t"
1725 "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1726 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1727 "xor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1728 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1729 "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1730 "pavgb %[ftmp6], %[ftmp14], %[ftmp1] \n\t"
1731 "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1732 "psubb %[ftmp7], %[ftmp14], %[ftmp1] \n\t"
1733 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1734 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1735 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1736 "ldc1 %[ftmp12], 0x10+%[stack] \n\t"
1737 "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1738 "pavgb %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1739 "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1740 "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1741 "xor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1742 "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1743 "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1744 "xor %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
1745 "pavgb %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
1746 "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1747 "ldc1 %[ftmp12], 0x40+%[stack] \n\t"
1748 "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1749 "ldc1 %[ftmp13], 0x20+%[stack] \n\t"
1750 "pavgb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1751 "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1752 "xor %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1753 "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1754 "and %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
1755 "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1756 "xor %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1757 "sdc1 %[ftmp6], 0x00(%[pix]) \n\t"
1758 "gsldxc1 %[ftmp6], 0x00(%[pix], %[addr1]) \n\t"
1759 "paddb %[ftmp7], %[ftmp14], %[ftmp6] \n\t"
1760 "pavgb %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1761 "ldc1 %[ftmp12], 0x00+%[stack] \n\t"
1762 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1763 "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1764 "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1765 "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1766 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1767 "xor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1768 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1769 "ldc1 %[ftmp12], 0x40+%[stack] \n\t"
1770 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1771 "xor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1772 "xor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1773 "and %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1774 "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1775 "xor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1776 "xor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1777 "gssdxc1 %[ftmp5], 0x00(%[pix], %[stride]) \n\t"
1778 "gssdxc1 %[ftmp6], 0x00(%[pix], %[addr2]) \n\t"
1779 "1: \n\t"
1780  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1781  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1782  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1783  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1784  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1785  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1786  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
1787  [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
1788  [tmp0]"=&r"(tmp[0]),
1789  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
1790  [addr2]"=&r"(addr[2]),
1791  [alpha]"+&r"(alpha), [beta]"+&r"(beta)
1792  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1793  [stack]"m"(stack[0]), [ff_pb_1]"m"(ff_pb_1)
1794 : "memory"
1795 );
1796 }
1797 
1798 void ff_deblock_v_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1799  int8_t *tc0)
1800 {
1801  double ftmp[9];
1802  mips_reg addr[1];
1803  uint64_t low32;
1804 
1805  __asm__ volatile (
1806  "addi %[alpha], %[alpha], -0x01 \n\t"
1807  "addi %[beta], %[beta], -0x01 \n\t"
1808  "or %[addr0], $0, %[pix] \n\t"
1809  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1810  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1811  "ldc1 %[ftmp1], 0x00(%[addr0]) \n\t"
1812  "gsldxc1 %[ftmp2], 0x00(%[addr0], %[stride]) \n\t"
1813  "ldc1 %[ftmp3], 0x00(%[pix]) \n\t"
1814  "gsldxc1 %[ftmp4], 0x00(%[pix], %[stride]) \n\t"
1815 
1816  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1817  "mtc1 %[alpha], %[ftmp5] \n\t"
1818  "mtc1 %[beta], %[ftmp6] \n\t"
1819  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1820  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1821  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1822  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1823  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1824  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1825  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1826  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1827  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1828  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1829  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1830  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1831  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1832  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1833  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1834  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1835  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1836  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1837  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1838  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1839  "uld %[low32], 0x00(%[tc0]) \n\t"
1840  "mtc1 %[low32], %[ftmp7] \n\t"
1841  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1842  "and %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1843  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1844  "xor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1845  "xor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1846  "and %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1847  "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1848  "xor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1849  "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1850  "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1851  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1852  "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1853  "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1854  "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1855  "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1856  "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1857  "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1858  "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1859  "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1860  "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1861 
1862  "gssdxc1 %[ftmp2], 0x00(%[addr0], %[stride]) \n\t"
1863  "sdc1 %[ftmp3], 0x00(%[pix]) \n\t"
1864  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1865  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1866  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1867  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1868  [ftmp8]"=&f"(ftmp[8]),
1869  [addr0]"=&r"(addr[0]),
1870  [low32]"=&r"(low32)
1871  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1872  [alpha]"r"(alpha), [beta]"r"(beta),
1873  [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1),
1874  [ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1)
1875  : "memory"
1876  );
1877 }
1878 
1880  int beta)
1881 {
1882  double ftmp[9];
1883  mips_reg addr[1];
1884 
1885  __asm__ volatile (
1886  "addi %[alpha], %[alpha], -0x01 \n\t"
1887  "addi %[beta], %[beta], -0x01 \n\t"
1888  "or %[addr0], $0, %[pix] \n\t"
1889  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1890  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1891  "ldc1 %[ftmp1], 0x00(%[addr0]) \n\t"
1892  "gsldxc1 %[ftmp2], 0x00(%[addr0], %[stride]) \n\t"
1893  "ldc1 %[ftmp3], 0x00(%[pix]) \n\t"
1894  "gsldxc1 %[ftmp4], 0x00(%[pix], %[stride]) \n\t"
1895 
1896  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1897  "mtc1 %[alpha], %[ftmp5] \n\t"
1898  "mtc1 %[beta], %[ftmp6] \n\t"
1899  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1900  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1901  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1902  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1903  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1904  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1905  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1906  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1907  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1908  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1909  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1910  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1911  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1912  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1913  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1914  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1915  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1916  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1917  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1918  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1919  "mov.d %[ftmp6], %[ftmp2] \n\t"
1920  "mov.d %[ftmp7], %[ftmp3] \n\t"
1921  "xor %[ftmp5], %[ftmp2], %[ftmp4] \n\t"
1922  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1923  "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1924  "psubusb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1925  "pavgb %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1926  "xor %[ftmp5], %[ftmp3], %[ftmp1] \n\t"
1927  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1928  "pavgb %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1929  "psubusb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1930  "pavgb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1931  "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1932  "psubb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1933  "and %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1934  "and %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1935  "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1936  "paddb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1937 
1938  "gssdxc1 %[ftmp2], 0x00(%[addr0], %[stride]) \n\t"
1939  "sdc1 %[ftmp3], 0x00(%[pix]) \n\t"
1940  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1941  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1942  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1943  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1944  [ftmp8]"=&f"(ftmp[8]),
1945  [addr0]"=&r"(addr[0])
1946  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1947  [alpha]"r"(alpha), [beta]"r"(beta),
1948  [ff_pb_1]"f"(ff_pb_1)
1949  : "memory"
1950  );
1951 }
1952 
1953 void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1954  int8_t *tc0)
1955 {
1956  double ftmp[11];
1957  mips_reg addr[6];
1958  uint64_t low32;
1959 
1960  __asm__ volatile (
1961  "addi %[alpha], %[alpha], -0x01 \n\t"
1962  "addi %[beta], %[beta], -0x01 \n\t"
1963  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1964  PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
1965  PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
1966  PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
1967  "or %[addr5], $0, %[pix] \n\t"
1968  PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
1969  "uld %[low32], 0x00(%[addr5]) \n\t"
1970  "mtc1 %[low32], %[ftmp0] \n\t"
1971  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
1972  "uld %[low32], 0x00(%[addr3]) \n\t"
1973  "mtc1 %[low32], %[ftmp2] \n\t"
1974  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
1975  "uld %[low32], 0x00(%[addr4]) \n\t"
1976  "mtc1 %[low32], %[ftmp1] \n\t"
1977  "uld %[low32], 0x00(%[pix]) \n\t"
1978  "mtc1 %[low32], %[ftmp3] \n\t"
1979  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1980  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1981  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
1982  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
1983  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1984  "uld %[low32], 0x00(%[addr3]) \n\t"
1985  "mtc1 %[low32], %[ftmp4] \n\t"
1986  PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
1987  "uld %[low32], 0x00(%[addr4]) \n\t"
1988  "mtc1 %[low32], %[ftmp6] \n\t"
1989  PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
1990  "uld %[low32], 0x00(%[addr3]) \n\t"
1991  "mtc1 %[low32], %[ftmp5] \n\t"
1992  PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
1993  "uld %[low32], 0x00(%[addr4]) \n\t"
1994  "mtc1 %[low32], %[ftmp7] \n\t"
1995  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1996  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1997  "mov.d %[ftmp6], %[ftmp4] \n\t"
1998  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1999  "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
2000  "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
2001  "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
2002  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2003  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2004  "mov.d %[ftmp9], %[ftmp0] \n\t"
2005  "mov.d %[ftmp10], %[ftmp3] \n\t"
2006 
2007  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2008  "mtc1 %[alpha], %[ftmp4] \n\t"
2009  "mtc1 %[beta], %[ftmp5] \n\t"
2010  "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2011  "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2012  "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2013  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2014  "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2015  "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2016  "or %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2017  "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2018  "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2019  "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2020  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2021  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2022  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2023  "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2024  "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2025  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2026  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2027  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2028  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2029  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2030  "uld %[low32], 0x00(%[tc0]) \n\t"
2031  "mtc1 %[low32], %[ftmp6] \n\t"
2032  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2033  "and %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2034  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2035  "xor %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
2036  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2037  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
2038  "pavgb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
2039  "xor %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
2040  "pavgb %[ftmp3], %[ftmp3], %[ff_pb_3] \n\t"
2041  "pavgb %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
2042  "pavgb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2043  "paddusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2044  "psubusb %[ftmp6], %[ff_pb_A1], %[ftmp3] \n\t"
2045  "psubusb %[ftmp3], %[ftmp3], %[ff_pb_A1] \n\t"
2046  "pminub %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
2047  "pminub %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2048  "psubusb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2049  "psubusb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2050  "paddusb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2051  "paddusb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2052 
2053  "punpckhwd %[ftmp4], %[ftmp9], %[ftmp9] \n\t"
2054  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2055  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2056  "punpcklbh %[ftmp0], %[ftmp9], %[ftmp1] \n\t"
2057  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
2058  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2059  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2060  "gsswlc1 %[ftmp1], 0x03(%[addr5]) \n\t"
2061  "gsswrc1 %[ftmp1], 0x00(%[addr5]) \n\t"
2062  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2063  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2064  "gsswlc1 %[ftmp1], 0x03(%[addr3]) \n\t"
2065  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2066  "gsswrc1 %[ftmp1], 0x00(%[addr3]) \n\t"
2067  "gsswlc1 %[ftmp0], 0x03(%[addr4]) \n\t"
2068  "gsswrc1 %[ftmp0], 0x00(%[addr4]) \n\t"
2069  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2070  "punpckhwd %[ftmp3], %[ftmp10], %[ftmp10] \n\t"
2071  "gsswlc1 %[ftmp0], 0x03(%[pix]) \n\t"
2072  "gsswrc1 %[ftmp0], 0x00(%[pix]) \n\t"
2073  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2074  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2075  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2076  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2077  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2078  "gsswlc1 %[ftmp5], 0x03(%[addr3]) \n\t"
2079  "gsswrc1 %[ftmp5], 0x00(%[addr3]) \n\t"
2080  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2081  PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2082  PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2083  "gsswlc1 %[ftmp5], 0x03(%[addr3]) \n\t"
2084  "gsswrc1 %[ftmp5], 0x00(%[addr3]) \n\t"
2085  "gsswlc1 %[ftmp4], 0x03(%[addr4]) \n\t"
2086  PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2087  "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2088  "gsswrc1 %[ftmp4], 0x00(%[addr4]) \n\t"
2089  "gsswlc1 %[ftmp9], 0x03(%[addr3]) \n\t"
2090  "gsswrc1 %[ftmp9], 0x00(%[addr3]) \n\t"
2091  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2092  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2093  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2094  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2095  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2096  [ftmp10]"=&f"(ftmp[10]),
2097  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2098  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2099  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2100  [pix]"+&r"(pix),
2101  [low32]"=&r"(low32)
2102  : [alpha]"r"(alpha), [beta]"r"(beta),
2103  [stride]"r"((mips_reg)stride), [tc0]"r"(tc0),
2104  [ff_pb_1]"f"(ff_pb_1), [ff_pb_3]"f"(ff_pb_3),
2105  [ff_pb_A1]"f"(ff_pb_A1)
2106  : "memory"
2107  );
2108 }
2109 
2111  int beta)
2112 {
2113  double ftmp[11];
2114  mips_reg addr[6];
2115  uint64_t low32;
2116 
2117  __asm__ volatile (
2118  "addi %[alpha], %[alpha], -0x01 \n\t"
2119  "addi %[beta], %[beta], -0x01 \n\t"
2120  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2121  PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
2122  PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
2123  PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
2124  "or %[addr5], $0, %[pix] \n\t"
2125  PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
2126  "uld %[low32], 0x00(%[addr5]) \n\t"
2127  "mtc1 %[low32], %[ftmp0] \n\t"
2128  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2129  "uld %[low32], 0x00(%[addr3]) \n\t"
2130  "mtc1 %[low32], %[ftmp2] \n\t"
2131  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2132  "uld %[low32], 0x00(%[addr4]) \n\t"
2133  "mtc1 %[low32], %[ftmp1] \n\t"
2134  "uld %[low32], 0x00(%[pix]) \n\t"
2135  "mtc1 %[low32], %[ftmp3] \n\t"
2136  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2137  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2138  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2139  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
2140  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2141  "uld %[low32], 0x00(%[addr3]) \n\t"
2142  "mtc1 %[low32], %[ftmp4] \n\t"
2143  PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
2144  "uld %[low32], 0x00(%[addr4]) \n\t"
2145  "mtc1 %[low32], %[ftmp6] \n\t"
2146  PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
2147  "uld %[low32], 0x00(%[addr3]) \n\t"
2148  "mtc1 %[low32], %[ftmp5] \n\t"
2149  PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
2150  "uld %[low32], 0x00(%[addr4]) \n\t"
2151  "mtc1 %[low32], %[ftmp7] \n\t"
2152  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2153  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
2154  "mov.d %[ftmp6], %[ftmp4] \n\t"
2155  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2156  "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
2157  "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
2158  "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
2159  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2160  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2161 
2162  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2163  "mtc1 %[alpha], %[ftmp4] \n\t"
2164  "mtc1 %[beta], %[ftmp5] \n\t"
2165  "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2166  "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2167  "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2168  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2169  "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2170  "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2171  "or %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2172  "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2173  "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2174  "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2175  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2176  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2177  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2178  "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2179  "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2180  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2181  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2182  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2183  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2184  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2185  "mov.d %[ftmp5], %[ftmp1] \n\t"
2186  "mov.d %[ftmp6], %[ftmp2] \n\t"
2187  "xor %[ftmp4], %[ftmp1], %[ftmp3] \n\t"
2188  "and %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2189  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2190  "psubusb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
2191  "pavgb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2192  "xor %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2193  "and %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2194  "pavgb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2195  "psubusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2196  "pavgb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2197  "psubb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2198  "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2199  "and %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2200  "and %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2201  "paddb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2202  "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2203 
2204  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2205  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2206  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2207  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2208  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2209  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2210  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2211  "gsswlc1 %[ftmp1], 0x03(%[addr5]) \n\t"
2212  "gsswrc1 %[ftmp1], 0x00(%[addr5]) \n\t"
2213  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2214  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2215  "gsswlc1 %[ftmp1], 0x03(%[addr3]) \n\t"
2216  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2217  "gsswrc1 %[ftmp1], 0x00(%[addr3]) \n\t"
2218  "gsswlc1 %[ftmp0], 0x03(%[addr4]) \n\t"
2219  "gsswrc1 %[ftmp0], 0x00(%[addr4]) \n\t"
2220  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2221  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2222  "gsswlc1 %[ftmp0], 0x03(%[pix]) \n\t"
2223  "gsswrc1 %[ftmp0], 0x00(%[pix]) \n\t"
2224  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2225  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2226  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2227  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2228  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2229  "gsswlc1 %[ftmp5], 0x03(%[addr3]) \n\t"
2230  "gsswrc1 %[ftmp5], 0x00(%[addr3]) \n\t"
2231  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2232  PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2233  PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2234  "gsswlc1 %[ftmp5], 0x03(%[addr3]) \n\t"
2235  "gsswrc1 %[ftmp5], 0x00(%[addr3]) \n\t"
2236  "gsswlc1 %[ftmp4], 0x03(%[addr4]) \n\t"
2237  PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2238  "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2239  "gsswrc1 %[ftmp4], 0x00(%[addr4]) \n\t"
2240  "gsswlc1 %[ftmp9], 0x03(%[addr3]) \n\t"
2241  "gsswrc1 %[ftmp9], 0x00(%[addr3]) \n\t"
2242  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2243  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2244  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2245  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2246  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2247  [ftmp10]"=&f"(ftmp[10]),
2248  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2249  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2250  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2251  [pix]"+&r"(pix),
2252  [low32]"=&r"(low32)
2253  : [alpha]"r"(alpha), [beta]"r"(beta),
2254  [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1)
2255  : "memory"
2256  );
2257 }
2258 
2259 void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
2260  int8_t *tc0)
2261 {
2262  if ((tc0[0] & tc0[1]) >= 0)
2263  ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
2264  if ((tc0[2] & tc0[3]) >= 0)
2265  ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
2266 }
2267 
2269  int beta)
2270 {
2271  deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
2272  deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
2273 }
2274 
2275 void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
2276  int8_t *tc0)
2277 {
2278  uint64_t stack[0xd];
2279  double ftmp[9];
2280  mips_reg addr[8];
2281 
2282  __asm__ volatile (
2283  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2284  PTR_ADDI "%[addr1], %[pix], -0x4 \n\t"
2285  PTR_ADDU "%[addr2], %[stride], %[addr0] \n\t"
2286  "gsldlc1 %[ftmp0], 0x07(%[addr1]) \n\t"
2287  "gsldrc1 %[ftmp0], 0x00(%[addr1]) \n\t"
2288  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2289  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2290  "gsldlc1 %[ftmp1], 0x07(%[addr3]) \n\t"
2291  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2292  "gsldrc1 %[ftmp1], 0x00(%[addr3]) \n\t"
2293  "gsldlc1 %[ftmp2], 0x07(%[addr5]) \n\t"
2294  "gsldrc1 %[ftmp2], 0x00(%[addr5]) \n\t"
2295  "gsldlc1 %[ftmp3], 0x07(%[addr4]) \n\t"
2296  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2297  "gsldrc1 %[ftmp3], 0x00(%[addr4]) \n\t"
2298  "gsldlc1 %[ftmp4], 0x07(%[addr3]) \n\t"
2299  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2300  "gsldrc1 %[ftmp4], 0x00(%[addr3]) \n\t"
2301  "gsldlc1 %[ftmp5], 0x07(%[addr5]) \n\t"
2302  PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2303  "gsldrc1 %[ftmp5], 0x00(%[addr5]) \n\t"
2304  "gsldlc1 %[ftmp6], 0x07(%[addr3]) \n\t"
2305  "gsldrc1 %[ftmp6], 0x00(%[addr3]) \n\t"
2306  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2307  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2308  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2309  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2310  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2311  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2312  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2313  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2314  "sdc1 %[ftmp1], 0x10(%[stack]) \n\t"
2315  "gsldlc1 %[ftmp8], 0x07(%[addr3]) \n\t"
2316  "gsldrc1 %[ftmp8], 0x00(%[addr3]) \n\t"
2317  PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2318  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2319  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2320  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2321  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2322  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2323  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2324  "ldc1 %[ftmp8], 0x10(%[stack]) \n\t"
2325  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2326  "sdc1 %[ftmp0], 0x00(%[stack]) \n\t"
2327  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2328  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2329  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2330  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2331  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2332  "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2333  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2334  "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2335  "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2336  "sdc1 %[ftmp1], 0x10(%[stack]) \n\t"
2337  "sdc1 %[ftmp3], 0x20(%[stack]) \n\t"
2338  "sdc1 %[ftmp7], 0x30(%[stack]) \n\t"
2339  "sdc1 %[ftmp5], 0x40(%[stack]) \n\t"
2340  "sdc1 %[ftmp6], 0x50(%[stack]) \n\t"
2341  PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2342  PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2343  "gsldlc1 %[ftmp0], 0x07(%[addr1]) \n\t"
2344  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2345  "gsldrc1 %[ftmp0], 0x00(%[addr1]) \n\t"
2346  "gsldlc1 %[ftmp1], 0x07(%[addr3]) \n\t"
2347  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2348  "gsldrc1 %[ftmp1], 0x00(%[addr3]) \n\t"
2349  "gsldlc1 %[ftmp2], 0x07(%[addr5]) \n\t"
2350  "gsldrc1 %[ftmp2], 0x00(%[addr5]) \n\t"
2351  "gsldlc1 %[ftmp3], 0x07(%[addr4]) \n\t"
2352  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2353  "gsldrc1 %[ftmp3], 0x00(%[addr4]) \n\t"
2354  "gsldlc1 %[ftmp4], 0x07(%[addr3]) \n\t"
2355  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2356  "gsldrc1 %[ftmp4], 0x00(%[addr3]) \n\t"
2357  "gsldlc1 %[ftmp5], 0x07(%[addr5]) \n\t"
2358  PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2359  "gsldrc1 %[ftmp5], 0x00(%[addr5]) \n\t"
2360  "gsldlc1 %[ftmp6], 0x07(%[addr3]) \n\t"
2361  "gsldrc1 %[ftmp6], 0x00(%[addr3]) \n\t"
2362  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2363  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2364  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2365  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2366  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2367  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2368  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2369  "sdc1 %[ftmp1], 0x18(%[stack]) \n\t"
2370  "gsldlc1 %[ftmp8], 0x07(%[addr3]) \n\t"
2371  "gsldrc1 %[ftmp8], 0x00(%[addr3]) \n\t"
2372  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2373  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2374  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2375  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2376  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2377  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2378  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2379  "ldc1 %[ftmp8], 0x18(%[stack]) \n\t"
2380  "sdc1 %[ftmp0], 0x08(%[stack]) \n\t"
2381  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2382  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2383  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2384  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2385  "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2386  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2387  "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2388  "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2389  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2390  "sdc1 %[ftmp1], 0x18(%[stack]) \n\t"
2391  "sdc1 %[ftmp3], 0x28(%[stack]) \n\t"
2392  "sdc1 %[ftmp7], 0x38(%[stack]) \n\t"
2393  "sdc1 %[ftmp5], 0x48(%[stack]) \n\t"
2394  "sdc1 %[ftmp6], 0x58(%[stack]) \n\t"
2395  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2396  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2397  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2398  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2399  [ftmp8]"=&f"(ftmp[8]),
2400  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2401  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2402  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2403  [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2404  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2405  [stack]"r"(stack)
2406  : "memory"
2407  );
2408 
2409  ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
2410 
2411  __asm__ volatile (
2412  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2413  PTR_ADDI "%[addr1], %[pix], -0x02 \n\t"
2414  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2415  PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2416  PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2417  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2418  "ldc1 %[ftmp0], 0x10(%[stack]) \n\t"
2419  "ldc1 %[ftmp1], 0x20(%[stack]) \n\t"
2420  "ldc1 %[ftmp2], 0x30(%[stack]) \n\t"
2421  "ldc1 %[ftmp3], 0x40(%[stack]) \n\t"
2422  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2423  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2424  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2425  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2426  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2427  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2428  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2429  "gsswlc1 %[ftmp1], 0x03(%[addr1]) \n\t"
2430  "gsswrc1 %[ftmp1], 0x00(%[addr1]) \n\t"
2431  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2432  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2433  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2434  "gsswlc1 %[ftmp1], 0x03(%[addr3]) \n\t"
2435  "gsswrc1 %[ftmp1], 0x00(%[addr3]) \n\t"
2436  "gsswlc1 %[ftmp0], 0x03(%[addr5]) \n\t"
2437  "gsswrc1 %[ftmp0], 0x00(%[addr5]) \n\t"
2438  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2439  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2440  "gsswlc1 %[ftmp0], 0x03(%[addr4]) \n\t"
2441  "gsswrc1 %[ftmp0], 0x00(%[addr4]) \n\t"
2442  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2443  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2444  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2445  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2446  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2447  "gsswlc1 %[ftmp5], 0x03(%[addr3]) \n\t"
2448  "gsswrc1 %[ftmp5], 0x00(%[addr3]) \n\t"
2449  PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2450  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2451  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2452  "gsswlc1 %[ftmp5], 0x03(%[addr3]) \n\t"
2453  "gsswrc1 %[ftmp5], 0x00(%[addr3]) \n\t"
2454  "gsswlc1 %[ftmp4], 0x03(%[addr5]) \n\t"
2455  "gsswrc1 %[ftmp4], 0x00(%[addr5]) \n\t"
2456  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2457  "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2458  PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2459  "gsswlc1 %[ftmp4], 0x03(%[addr3]) \n\t"
2460  "gsswrc1 %[ftmp4], 0x00(%[addr3]) \n\t"
2461  PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2462  "ldc1 %[ftmp0], 0x18(%[stack]) \n\t"
2463  "ldc1 %[ftmp1], 0x28(%[stack]) \n\t"
2464  "ldc1 %[ftmp2], 0x38(%[stack]) \n\t"
2465  "ldc1 %[ftmp3], 0x48(%[stack]) \n\t"
2466  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2467  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2468  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2469  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2470  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2471  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2472  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2473  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2474  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2475  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2476  "gsswlc1 %[ftmp1], 0x03(%[addr1]) \n\t"
2477  "gsswrc1 %[ftmp1], 0x00(%[addr1]) \n\t"
2478  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2479  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2480  "gsswlc1 %[ftmp1], 0x03(%[addr3]) \n\t"
2481  "gsswrc1 %[ftmp1], 0x00(%[addr3]) \n\t"
2482  "gsswlc1 %[ftmp0], 0x03(%[addr5]) \n\t"
2483  "gsswrc1 %[ftmp0], 0x00(%[addr5]) \n\t"
2484  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2485  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2486  "gsswlc1 %[ftmp0], 0x03(%[addr4]) \n\t"
2487  "gsswrc1 %[ftmp0], 0x00(%[addr4]) \n\t"
2488  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2489  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2490  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2491  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2492  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2493  "gsswlc1 %[ftmp5], 0x03(%[addr3]) \n\t"
2494  "gsswrc1 %[ftmp5], 0x00(%[addr3]) \n\t"
2495  PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2496  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2497  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2498  "gsswlc1 %[ftmp5], 0x03(%[addr3]) \n\t"
2499  "gsswrc1 %[ftmp5], 0x00(%[addr3]) \n\t"
2500  "gsswlc1 %[ftmp4], 0x03(%[addr5]) \n\t"
2501  "gsswrc1 %[ftmp4], 0x00(%[addr5]) \n\t"
2502  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2503  "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2504  "gsswlc1 %[ftmp4], 0x03(%[addr3]) \n\t"
2505  "gsswrc1 %[ftmp4], 0x00(%[addr3]) \n\t"
2506  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2507  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2508  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2509  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2510  [ftmp8]"=&f"(ftmp[8]),
2511  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2512  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2513  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2514  [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2515  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2516  [stack]"r"(stack)
2517  : "memory"
2518  );
2519 }
2520 
2522  int beta)
2523 {
2524  uint64_t ptmp[0x11];
2525  uint64_t pdat[4];
2526  double ftmp[9];
2527  mips_reg addr[7];
2528 
2529  __asm__ volatile (
2530  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2531  PTR_ADDI "%[addr1], %[pix], -0x04 \n\t"
2532  PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2533  PTR_ADDU "%[addr3], %[addr0], %[addr0] \n\t"
2534  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2535  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2536  "gsldlc1 %[ftmp0], 0x07(%[addr1]) \n\t"
2537  "gsldrc1 %[ftmp0], 0x00(%[addr1]) \n\t"
2538  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2539  "gsldlc1 %[ftmp1], 0x07(%[addr5]) \n\t"
2540  "gsldrc1 %[ftmp1], 0x00(%[addr5]) \n\t"
2541  "gsldlc1 %[ftmp2], 0x07(%[addr6]) \n\t"
2542  "gsldrc1 %[ftmp2], 0x00(%[addr6]) \n\t"
2543  PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2544  "gsldlc1 %[ftmp3], 0x07(%[addr4]) \n\t"
2545  "gsldrc1 %[ftmp3], 0x00(%[addr4]) \n\t"
2546  PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2547  "gsldlc1 %[ftmp4], 0x07(%[addr5]) \n\t"
2548  "gsldrc1 %[ftmp4], 0x00(%[addr5]) \n\t"
2549  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2550  "gsldlc1 %[ftmp5], 0x07(%[addr6]) \n\t"
2551  "gsldrc1 %[ftmp5], 0x00(%[addr6]) \n\t"
2552  "gsldlc1 %[ftmp6], 0x07(%[addr5]) \n\t"
2553  "gsldrc1 %[ftmp6], 0x00(%[addr5]) \n\t"
2554  PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2555  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2556  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2557  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2558  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2559  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2560  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2561  "gsldlc1 %[ftmp8], 0x07(%[addr5]) \n\t"
2562  "gsldrc1 %[ftmp8], 0x00(%[addr5]) \n\t"
2563  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2564  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2565  "sdc1 %[ftmp3], 0x00(%[ptmp]) \n\t"
2566  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2567  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2568  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2569  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2570  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2571  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2572  "sdc1 %[ftmp2], 0x20(%[ptmp]) \n\t"
2573  "ldc1 %[ftmp2], 0x00(%[ptmp]) \n\t"
2574  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2575  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2576  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2577  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2578  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2579  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2580  "sdc1 %[ftmp0], 0x00(%[ptmp]) \n\t"
2581  "sdc1 %[ftmp5], 0x10(%[ptmp]) \n\t"
2582  "sdc1 %[ftmp7], 0x40(%[ptmp]) \n\t"
2583  "sdc1 %[ftmp4], 0x50(%[ptmp]) \n\t"
2584  "ldc1 %[ftmp8], 0x20(%[ptmp]) \n\t"
2585  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2586  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2587  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2588  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2589  PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2590  "sdc1 %[ftmp3], 0x20(%[ptmp]) \n\t"
2591  "sdc1 %[ftmp0], 0x30(%[ptmp]) \n\t"
2592  "sdc1 %[ftmp6], 0x60(%[ptmp]) \n\t"
2593  "sdc1 %[ftmp5], 0x70(%[ptmp]) \n\t"
2594  PTR_ADDU "%[addr1], %[addr1], %[addr5] \n\t"
2595  PTR_ADDU "%[addr4], %[addr4], %[addr5] \n\t"
2596  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2597  "gsldlc1 %[ftmp0], 0x07(%[addr1]) \n\t"
2598  "gsldrc1 %[ftmp0], 0x00(%[addr1]) \n\t"
2599  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2600  "gsldlc1 %[ftmp1], 0x07(%[addr5]) \n\t"
2601  "gsldrc1 %[ftmp1], 0x00(%[addr5]) \n\t"
2602  "gsldlc1 %[ftmp2], 0x07(%[addr6]) \n\t"
2603  "gsldrc1 %[ftmp2], 0x00(%[addr6]) \n\t"
2604  PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2605  "gsldlc1 %[ftmp3], 0x07(%[addr4]) \n\t"
2606  "gsldrc1 %[ftmp3], 0x00(%[addr4]) \n\t"
2607  PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2608  "gsldlc1 %[ftmp4], 0x07(%[addr5]) \n\t"
2609  "gsldrc1 %[ftmp4], 0x00(%[addr5]) \n\t"
2610  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2611  "gsldlc1 %[ftmp5], 0x07(%[addr6]) \n\t"
2612  "gsldrc1 %[ftmp5], 0x00(%[addr6]) \n\t"
2613  "gsldlc1 %[ftmp6], 0x07(%[addr5]) \n\t"
2614  "gsldrc1 %[ftmp6], 0x00(%[addr5]) \n\t"
2615  PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2616  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2617  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2618  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2619  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2620  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2621  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2622  "gsldlc1 %[ftmp8], 0x07(%[addr5]) \n\t"
2623  "gsldrc1 %[ftmp8], 0x00(%[addr5]) \n\t"
2624  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2625  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2626  "sdc1 %[ftmp3], 0x08(%[ptmp]) \n\t"
2627  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2628  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2629  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2630  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2631  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2632  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2633  "sdc1 %[ftmp2], 0x28(%[ptmp]) \n\t"
2634  "ldc1 %[ftmp2], 0x08(%[ptmp]) \n\t"
2635  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2636  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2637  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2638  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2639  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2640  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2641  "sdc1 %[ftmp0], 0x08(%[ptmp]) \n\t"
2642  "sdc1 %[ftmp5], 0x18(%[ptmp]) \n\t"
2643  "sdc1 %[ftmp7], 0x48(%[ptmp]) \n\t"
2644  "sdc1 %[ftmp4], 0x58(%[ptmp]) \n\t"
2645  "ldc1 %[ftmp8], 0x28(%[ptmp]) \n\t"
2646  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2647  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2648  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2649  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2650  "sdc1 %[ftmp3], 0x28(%[ptmp]) \n\t"
2651  "sdc1 %[ftmp0], 0x38(%[ptmp]) \n\t"
2652  "sdc1 %[ftmp6], 0x68(%[ptmp]) \n\t"
2653  "sdc1 %[ftmp5], 0x78(%[ptmp]) \n\t"
2654  PTR_S "%[addr1], 0x00(%[pdat]) \n\t"
2655  PTR_S "%[addr2], 0x08(%[pdat]) \n\t"
2656  PTR_S "%[addr0], 0x10(%[pdat]) \n\t"
2657  PTR_S "%[addr3], 0x18(%[pdat]) \n\t"
2658  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2659  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2660  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2661  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2662  [ftmp8]"=&f"(ftmp[8]),
2663  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2664  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2665  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2666  [addr6]"=&r"(addr[6])
2667  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2668  [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2669  : "memory"
2670  );
2671 
2672  ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
2673 
2674  __asm__ volatile (
2675  PTR_L "%[addr1], 0x00(%[pdat]) \n\t"
2676  PTR_L "%[addr2], 0x08(%[pdat]) \n\t"
2677  PTR_L "%[addr0], 0x10(%[pdat]) \n\t"
2678  PTR_L "%[addr3], 0x18(%[pdat]) \n\t"
2679  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2680  "ldc1 %[ftmp0], 0x08(%[ptmp]) \n\t"
2681  "ldc1 %[ftmp1], 0x18(%[ptmp]) \n\t"
2682  "ldc1 %[ftmp2], 0x28(%[ptmp]) \n\t"
2683  "ldc1 %[ftmp3], 0x38(%[ptmp]) \n\t"
2684  "ldc1 %[ftmp4], 0x48(%[ptmp]) \n\t"
2685  "ldc1 %[ftmp5], 0x58(%[ptmp]) \n\t"
2686  "ldc1 %[ftmp6], 0x68(%[ptmp]) \n\t"
2687  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2688  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2689  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2690  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2691  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2692  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2693  "ldc1 %[ftmp8], 0x78(%[ptmp]) \n\t"
2694  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2695  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2696  "gssdlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
2697  "gssdrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
2698  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2699  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2700  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2701  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2702  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2703  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2704  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2705  "gssdlc1 %[ftmp2], 0x07(%[addr5]) \n\t"
2706  "gssdrc1 %[ftmp2], 0x00(%[addr5]) \n\t"
2707  "gsldlc1 %[ftmp2], 0x07(%[addr1]) \n\t"
2708  "gsldrc1 %[ftmp2], 0x00(%[addr1]) \n\t"
2709  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2710  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2711  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2712  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2713  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2714  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2715  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2716  "gssdlc1 %[ftmp0], 0x07(%[addr1]) \n\t"
2717  "gssdrc1 %[ftmp0], 0x00(%[addr1]) \n\t"
2718  PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2719  "gssdlc1 %[ftmp5], 0x07(%[addr5]) \n\t"
2720  "gssdrc1 %[ftmp5], 0x00(%[addr5]) \n\t"
2721  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2722  "gssdlc1 %[ftmp7], 0x07(%[addr6]) \n\t"
2723  "gssdrc1 %[ftmp7], 0x00(%[addr6]) \n\t"
2724  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2725  "gssdlc1 %[ftmp4], 0x07(%[addr5]) \n\t"
2726  "gssdrc1 %[ftmp4], 0x00(%[addr5]) \n\t"
2727  "gsldlc1 %[ftmp8], 0x07(%[addr6]) \n\t"
2728  "gsldrc1 %[ftmp8], 0x00(%[addr6]) \n\t"
2729  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2730  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2731  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2732  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2733  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2734  "gssdlc1 %[ftmp3], 0x07(%[addr5]) \n\t"
2735  "gssdrc1 %[ftmp3], 0x00(%[addr5]) \n\t"
2736  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2737  "gssdlc1 %[ftmp0], 0x07(%[addr4]) \n\t"
2738  "gssdrc1 %[ftmp0], 0x00(%[addr4]) \n\t"
2739  PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2740  "gssdlc1 %[ftmp6], 0x07(%[addr5]) \n\t"
2741  "gssdrc1 %[ftmp6], 0x00(%[addr5]) \n\t"
2742  PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2743  "gssdlc1 %[ftmp5], 0x07(%[addr6]) \n\t"
2744  "gssdrc1 %[ftmp5], 0x00(%[addr6]) \n\t"
2745  PTR_SUBU "%[addr1], %[addr1], %[addr5] \n\t"
2746  PTR_SUBU "%[addr4], %[addr4], %[addr5] \n\t"
2747  "ldc1 %[ftmp0], 0x00(%[ptmp]) \n\t"
2748  "ldc1 %[ftmp1], 0x10(%[ptmp]) \n\t"
2749  "ldc1 %[ftmp2], 0x20(%[ptmp]) \n\t"
2750  "ldc1 %[ftmp3], 0x30(%[ptmp]) \n\t"
2751  "ldc1 %[ftmp4], 0x40(%[ptmp]) \n\t"
2752  "ldc1 %[ftmp5], 0x50(%[ptmp]) \n\t"
2753  "ldc1 %[ftmp6], 0x60(%[ptmp]) \n\t"
2754  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2755  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2756  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2757  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2758  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2759  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2760  "ldc1 %[ftmp8], 0x70(%[ptmp]) \n\t"
2761  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2762  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2763  "gssdlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
2764  "gssdrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
2765  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2766  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2767  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2768  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2769  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2770  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2771  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2772  "gssdlc1 %[ftmp2], 0x07(%[addr5]) \n\t"
2773  "gssdrc1 %[ftmp2], 0x00(%[addr5]) \n\t"
2774  "gsldlc1 %[ftmp2], 0x07(%[addr1]) \n\t"
2775  "gsldrc1 %[ftmp2], 0x00(%[addr1]) \n\t"
2776  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2777  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2778  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2779  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2780  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2781  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2782  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2783  "gssdlc1 %[ftmp0], 0x07(%[addr1]) \n\t"
2784  "gssdrc1 %[ftmp0], 0x00(%[addr1]) \n\t"
2785  PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2786  "gssdlc1 %[ftmp5], 0x07(%[addr5]) \n\t"
2787  "gssdrc1 %[ftmp5], 0x00(%[addr5]) \n\t"
2788  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2789  "gssdlc1 %[ftmp7], 0x07(%[addr6]) \n\t"
2790  "gssdrc1 %[ftmp7], 0x00(%[addr6]) \n\t"
2791  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2792  "gssdlc1 %[ftmp4], 0x07(%[addr5]) \n\t"
2793  "gssdrc1 %[ftmp4], 0x00(%[addr5]) \n\t"
2794  "gsldlc1 %[ftmp8], 0x07(%[addr6]) \n\t"
2795  "gsldrc1 %[ftmp8], 0x00(%[addr6]) \n\t"
2796  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2797  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2798  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2799  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2800  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2801  "gssdlc1 %[ftmp3], 0x07(%[addr5]) \n\t"
2802  "gssdrc1 %[ftmp3], 0x00(%[addr5]) \n\t"
2803  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2804  "gssdlc1 %[ftmp0], 0x07(%[addr4]) \n\t"
2805  "gssdrc1 %[ftmp0], 0x00(%[addr4]) \n\t"
2806  PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2807  "gssdlc1 %[ftmp6], 0x07(%[addr5]) \n\t"
2808  "gssdrc1 %[ftmp6], 0x00(%[addr5]) \n\t"
2809  "gssdlc1 %[ftmp5], 0x07(%[addr6]) \n\t"
2810  "gssdrc1 %[ftmp5], 0x00(%[addr6]) \n\t"
2811  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2812  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2813  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2814  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2815  [ftmp8]"=&f"(ftmp[8]),
2816  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2817  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2818  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2819  [addr6]"=&r"(addr[6])
2820  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2821  [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2822  : "memory"
2823  );
2824 }
void ff_h264_weight_pixels8_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1276
#define mips_reg
Definition: asmdefs.h:44
void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:795
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
Definition: h264dsp_mmi.c:1141
else temp
Definition: vf_mcdeint.c:259
void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:2259
const char * b
Definition: vf_curves.c:109
#define PTR_SLL
Definition: asmdefs.h:55
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:53
void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2110
void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:713
void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:842
static int16_t block[64]
Definition: dct.c:113
uint8_t
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1208
void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input, int qmul)
Definition: h264dsp_mmi.c:886
void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2268
void ff_h264_weight_pixels4_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1365
#define PTR_ADDI
Definition: asmdefs.h:49
void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:2275
#define height
const uint64_t ff_pb_A1
Definition: constants.c:55
const uint64_t ff_pw_32
Definition: constants.c:38
void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1405
void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:858
static double alpha(void *priv, double x, double y)
Definition: vf_geq.c:99
void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:825
const uint64_t ff_pb_3
Definition: constants.c:53
void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:179
void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2521
void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:812
void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
Definition: h264dsp_mmi.c:1108
void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:84
static const uint8_t offset[127][2]
Definition: vf_spp.c:92
const uint64_t ff_pw_1
Definition: constants.c:26
static const uint8_t scan8[16 *3+3]
Definition: h264.h:801
#define PTR_SUBU
Definition: asmdefs.h:50
void ff_deblock_v_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1798
#define src
Definition: vp9dsp.c:530
static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1576
void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1879
void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1451
#define src1
Definition: h264pred.c:139
void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:662
void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1316
#define PTR_ADDIU
Definition: asmdefs.h:48
#define src0
Definition: h264pred.c:138
#define PTR_L
Definition: asmdefs.h:51
static int weight(int i, int blen, int offset)
Definition: diracdec.c:1429
void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1953
const uint64_t ff_pb_1
Definition: constants.c:52
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
static double c[64]
void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
Definition: h264dsp_mmi.c:30
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1155
static uint8_t tmp[8]
Definition: des.c:38
#define PTR_S
Definition: asmdefs.h:52
static int16_t block1[64]
Definition: dct.c:114
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> dc
#define PTR_ADDU
Definition: asmdefs.h:47
#define stride
#define PTR_SRL
Definition: asmdefs.h:54