FFmpeg
h264dsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264dsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  * Heiher <r@hev.cc>
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
27 #include "h264dsp_mips.h"
29 #include "libavutil/mem_internal.h"
30 
31 void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
32 {
33  double ftmp[9];
35 
36  __asm__ volatile (
37  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
38  MMI_LDC1(%[ftmp1], %[src], 0x00)
39  MMI_LDC1(%[ftmp2], %[src], 0x08)
40  MMI_LDC1(%[ftmp3], %[src], 0x10)
41  MMI_LDC1(%[ftmp4], %[src], 0x18)
42  /* memset(src, 0, 32); */
43  MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x00)
44  MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x10)
45  MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
46  MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
47  MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
48  MMI_ULWC1(%[ftmp8], %[dst3], 0x00)
49  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
50  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
51  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
52  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
53  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
54  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
55  "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
56  "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
57  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
58  "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
59  "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
60  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
61  MMI_SWC1(%[ftmp1], %[dst0], 0x00)
62  MMI_SWC1(%[ftmp2], %[dst1], 0x00)
63  MMI_SWC1(%[ftmp3], %[dst2], 0x00)
64  MMI_SWC1(%[ftmp4], %[dst3], 0x00)
65  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
66  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
67  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
68  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
70  [ftmp8]"=&f"(ftmp[8])
71  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
72  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
73  [src]"r"(src)
74  : "memory"
75  );
76 
77 }
78 
79 void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
80 {
81  double ftmp[12];
82  uint64_t tmp[1];
85 
86  __asm__ volatile (
87  MMI_LDC1(%[ftmp0], %[block], 0x00)
88  MMI_LDC1(%[ftmp1], %[block], 0x08)
89  MMI_LDC1(%[ftmp2], %[block], 0x10)
90  MMI_LDC1(%[ftmp3], %[block], 0x18)
91  /* memset(block, 0, 32) */
92  "pxor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
93  MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x00)
94  MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x10)
95  "dli %[tmp0], 0x01 \n\t"
96  "mtc1 %[tmp0], %[ftmp8] \n\t"
97  "dli %[tmp0], 0x06 \n\t"
98  "mtc1 %[tmp0], %[ftmp9] \n\t"
99  "psrah %[ftmp4], %[ftmp1], %[ftmp8] \n\t"
100  "psrah %[ftmp5], %[ftmp3], %[ftmp8] \n\t"
101  "psubh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
102  "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
103  "paddh %[ftmp10], %[ftmp2], %[ftmp0] \n\t"
104  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
105  "paddh %[ftmp11], %[ftmp5], %[ftmp10] \n\t"
106  "psubh %[ftmp2], %[ftmp10], %[ftmp5] \n\t"
107  "paddh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
108  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
109  "punpckhhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
110  "punpcklhw %[ftmp5], %[ftmp11], %[ftmp10] \n\t"
111  "punpckhhw %[ftmp4], %[ftmp0], %[ftmp2] \n\t"
112  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
113  "punpckhwd %[ftmp2], %[ftmp5], %[ftmp0] \n\t"
114  "punpcklwd %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
115  "punpcklwd %[ftmp10], %[ftmp1], %[ftmp4] \n\t"
116  "punpckhwd %[ftmp0], %[ftmp1], %[ftmp4] \n\t"
117  "paddh %[ftmp5], %[ftmp5], %[ff_pw_32] \n\t"
118  "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
119  "psrah %[ftmp3], %[ftmp0], %[ftmp8] \n\t"
120  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
121  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
122  "paddh %[ftmp1], %[ftmp10], %[ftmp5] \n\t"
123  "psubh %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
124  "paddh %[ftmp10], %[ftmp3], %[ftmp1] \n\t"
125  "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
126  "paddh %[ftmp11], %[ftmp4], %[ftmp5] \n\t"
127  "psubh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
128  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
129  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
130  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
131  "psrah %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
132  "psrah %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
133  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
134  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
135  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
136  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
137  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
138  "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
139  MMI_SWC1(%[ftmp2], %[dst], 0x00)
140  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
141  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
142  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
143  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
144  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
145  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
146  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
147  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
148  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
149  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
150  "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
151  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
152  MMI_SWC1(%[ftmp2], %[dst], 0x00)
153  "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
154  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
155  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
156  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
157  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
158  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
159  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
160  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
163  [tmp0]"=&r"(tmp[0])
164  : [dst]"r"(dst), [block]"r"(block),
165  [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32.f)
166  : "memory"
167  );
168 
169 }
170 
171 void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
172 {
173  double ftmp[16];
174  uint64_t tmp[7];
175  mips_reg addr[1];
178 
179  __asm__ volatile (
180  "lhu %[tmp0], 0x00(%[block]) \n\t"
181  PTR_ADDI "$sp, $sp, -0x20 \n\t"
182  PTR_ADDIU "%[tmp0], %[tmp0], 0x20 \n\t"
183  MMI_LDC1(%[ftmp1], %[block], 0x10)
184  "sh %[tmp0], 0x00(%[block]) \n\t"
185  MMI_LDC1(%[ftmp2], %[block], 0x20)
186  "dli %[tmp0], 0x01 \n\t"
187  MMI_LDC1(%[ftmp3], %[block], 0x30)
188  "mtc1 %[tmp0], %[ftmp8] \n\t"
189  MMI_LDC1(%[ftmp5], %[block], 0x50)
190  MMI_LDC1(%[ftmp6], %[block], 0x60)
191  MMI_LDC1(%[ftmp7], %[block], 0x70)
192  "mov.d %[ftmp0], %[ftmp1] \n\t"
193  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
194  "psrah %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
195  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
196  "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
197  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
198  "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
199  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
200  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
201  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
202  "psubh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
203  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
204  "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
205  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
206  "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
207  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
208  "dli %[tmp0], 0x02 \n\t"
209  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
210  "mtc1 %[tmp0], %[ftmp9] \n\t"
211  "mov.d %[ftmp7], %[ftmp1] \n\t"
212  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
213  "psrah %[ftmp3], %[ftmp4], %[ftmp9] \n\t"
214  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
215  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
216  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
217  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
218  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
219  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
220  "mov.d %[ftmp5], %[ftmp6] \n\t"
221  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
222  "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
223  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
224  "psubh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
225  MMI_LDC1(%[ftmp2], %[block], 0x00)
226  MMI_LDC1(%[ftmp5], %[block], 0x40)
227  "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
228  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
229  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
230  "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
231  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
232  "paddh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
233  "psubh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
234  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
235  "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
236  "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
237  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
238  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
239  "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
240  "paddh %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
241  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
242  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
243  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
244  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
245  "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
246  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
247  MMI_SDC1(%[ftmp6], %[block], 0x00)
248  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
249  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
250  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
251  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp1] \n\t"
252  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
253  "punpckhwd %[ftmp1], %[ftmp7], %[ftmp3] \n\t"
254  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
255  "punpckhwd %[ftmp3], %[ftmp6], %[ftmp0] \n\t"
256  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
257  MMI_LDC1(%[ftmp0], %[block], 0x00)
258  MMI_SDC1(%[ftmp7], $sp, 0x00)
259  MMI_SDC1(%[ftmp1], $sp, 0x10)
260  "dmfc1 %[tmp1], %[ftmp6] \n\t"
261  "dmfc1 %[tmp3], %[ftmp3] \n\t"
262  "punpckhhw %[ftmp3], %[ftmp5], %[ftmp2] \n\t"
263  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
264  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp0] \n\t"
265  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
266  "punpckhwd %[ftmp0], %[ftmp5], %[ftmp4] \n\t"
267  "punpcklwd %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
268  "punpckhwd %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
269  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
270  MMI_SDC1(%[ftmp5], $sp, 0x08)
271  MMI_SDC1(%[ftmp0], $sp, 0x18)
272  "dmfc1 %[tmp2], %[ftmp3] \n\t"
273  "dmfc1 %[tmp4], %[ftmp4] \n\t"
274  MMI_LDC1(%[ftmp1], %[block], 0x18)
275  MMI_LDC1(%[ftmp6], %[block], 0x28)
276  MMI_LDC1(%[ftmp2], %[block], 0x38)
277  MMI_LDC1(%[ftmp0], %[block], 0x58)
278  MMI_LDC1(%[ftmp3], %[block], 0x68)
279  MMI_LDC1(%[ftmp4], %[block], 0x78)
280  "mov.d %[ftmp7], %[ftmp1] \n\t"
281  "psrah %[ftmp5], %[ftmp0], %[ftmp8] \n\t"
282  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
283  "paddh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
284  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
285  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
286  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
287  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
288  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
289  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
290  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
291  "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
292  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
293  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
294  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
295  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
296  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
297  "mov.d %[ftmp4], %[ftmp1] \n\t"
298  "psrah %[ftmp2], %[ftmp5], %[ftmp9] \n\t"
299  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
300  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
301  "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
302  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
303  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
304  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
305  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
306  "mov.d %[ftmp0], %[ftmp3] \n\t"
307  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
308  "psrah %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
309  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
310  "psubh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
311  MMI_LDC1(%[ftmp6], %[block], 0x08)
312  MMI_LDC1(%[ftmp0], %[block], 0x48)
313  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
314  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
315  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
316  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
317  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
318  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
319  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
320  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
321  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
322  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
323  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
324  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
325  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
326  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
327  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
328  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
329  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
330  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
331  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
332  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
333  MMI_SDC1(%[ftmp3], %[block], 0x08)
334  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
335  "punpckhhw %[ftmp3], %[ftmp4], %[ftmp7] \n\t"
336  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
337  "punpckhhw %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
338  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
339  "punpckhwd %[ftmp1], %[ftmp4], %[ftmp2] \n\t"
340  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
341  "punpckhwd %[ftmp2], %[ftmp3], %[ftmp7] \n\t"
342  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
343  MMI_LDC1(%[ftmp7], %[block], 0x08)
344  "dmfc1 %[tmp5], %[ftmp4] \n\t"
345  "mov.d %[ftmp10], %[ftmp1] \n\t"
346  "mov.d %[ftmp12], %[ftmp3] \n\t"
347  "mov.d %[ftmp14], %[ftmp2] \n\t"
348  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp6] \n\t"
349  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
350  "punpckhhw %[ftmp6], %[ftmp5], %[ftmp7] \n\t"
351  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
352  "punpckhwd %[ftmp7], %[ftmp0], %[ftmp5] \n\t"
353  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
354  "punpckhwd %[ftmp5], %[ftmp2], %[ftmp6] \n\t"
355  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
356  "dmfc1 %[tmp6], %[ftmp0] \n\t"
357  "mov.d %[ftmp11], %[ftmp7] \n\t"
358  "mov.d %[ftmp13], %[ftmp2] \n\t"
359  "mov.d %[ftmp15], %[ftmp5] \n\t"
360  PTR_ADDIU "%[addr0], %[dst], 0x04 \n\t"
361  "mov.d %[ftmp7], %[ftmp10] \n\t"
362  "dmtc1 %[tmp3], %[ftmp6] \n\t"
363  MMI_LDC1(%[ftmp1], $sp, 0x10)
364  "dmtc1 %[tmp1], %[ftmp3] \n\t"
365  "mov.d %[ftmp4], %[ftmp1] \n\t"
366  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
367  "psrah %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
368  "paddh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
369  "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
370  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
371  "paddh %[ftmp0], %[ftmp0], %[ftmp14] \n\t"
372  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
373  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
374  "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
375  "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
376  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
377  "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t"
378  "psubh %[ftmp7], %[ftmp7], %[ftmp14] \n\t"
379  "psrah %[ftmp5], %[ftmp14], %[ftmp8] \n\t"
380  "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
381  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
382  "mov.d %[ftmp5], %[ftmp1] \n\t"
383  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
384  "psrah %[ftmp6], %[ftmp0], %[ftmp9] \n\t"
385  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
386  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
387  "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
388  "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
389  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
390  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
391  "mov.d %[ftmp7], %[ftmp12] \n\t"
392  "psrah %[ftmp2], %[ftmp12], %[ftmp8] \n\t"
393  "psrah %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
394  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
395  "psubh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
396  MMI_LDC1(%[ftmp3], $sp, 0x00)
397  "dmtc1 %[tmp5], %[ftmp7] \n\t"
398  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
399  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
400  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
401  "psubh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
402  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
403  "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
404  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
405  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
406  "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
407  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
408  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
409  "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
410  "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
411  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
412  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
413  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
414  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
415  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
416  "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
417  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
418  MMI_SDC1(%[ftmp3], $sp, 0x00)
419  "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
420  MMI_SDC1(%[ftmp0], $sp, 0x10)
421  "dmfc1 %[tmp1], %[ftmp2] \n\t"
422  "pxor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
423  MMI_SDC1(%[ftmp2], %[block], 0x00)
424  MMI_SDC1(%[ftmp2], %[block], 0x08)
425  MMI_SDC1(%[ftmp2], %[block], 0x10)
426  MMI_SDC1(%[ftmp2], %[block], 0x18)
427  MMI_SDC1(%[ftmp2], %[block], 0x20)
428  MMI_SDC1(%[ftmp2], %[block], 0x28)
429  MMI_SDC1(%[ftmp2], %[block], 0x30)
430  MMI_SDC1(%[ftmp2], %[block], 0x38)
431  MMI_SDC1(%[ftmp2], %[block], 0x40)
432  MMI_SDC1(%[ftmp2], %[block], 0x48)
433  MMI_SDC1(%[ftmp2], %[block], 0x50)
434  MMI_SDC1(%[ftmp2], %[block], 0x58)
435  MMI_SDC1(%[ftmp2], %[block], 0x60)
436  MMI_SDC1(%[ftmp2], %[block], 0x68)
437  MMI_SDC1(%[ftmp2], %[block], 0x70)
438  MMI_SDC1(%[ftmp2], %[block], 0x78)
439  "dli %[tmp3], 0x06 \n\t"
440  "mtc1 %[tmp3], %[ftmp10] \n\t"
441  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
442  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
443  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
444  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
445  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
446  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
447  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
448  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
449  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
450  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
451  MMI_SWC1(%[ftmp3], %[dst], 0x00)
452  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
453  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
454  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
455  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
456  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
457  "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
458  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
459  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
460  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
461  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
462  "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
463  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
464  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
465  MMI_SWC1(%[ftmp3], %[dst], 0x00)
466  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
467  MMI_LDC1(%[ftmp5], $sp, 0x00)
468  MMI_LDC1(%[ftmp4], $sp, 0x10)
469  "dmtc1 %[tmp1], %[ftmp6] \n\t"
470  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
471  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
472  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
473  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
474  "psrah %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
475  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
476  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
477  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
478  "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
479  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
480  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
481  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
482  MMI_SWC1(%[ftmp3], %[dst], 0x00)
483  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
484  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
485  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
486  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
487  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
488  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
489  "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
490  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
491  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
492  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
493  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
494  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
495  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
496  MMI_SWC1(%[ftmp3], %[dst], 0x00)
497  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
498  "dmtc1 %[tmp4], %[ftmp1] \n\t"
499  "dmtc1 %[tmp2], %[ftmp6] \n\t"
500  MMI_LDC1(%[ftmp4], $sp, 0x18)
501  "mov.d %[ftmp5], %[ftmp4] \n\t"
502  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
503  "psrah %[ftmp7], %[ftmp11], %[ftmp8] \n\t"
504  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
505  "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
506  "paddh %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
507  "paddh %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
508  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
509  "paddh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
510  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
511  "psubh %[ftmp3], %[ftmp11], %[ftmp1] \n\t"
512  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
513  "paddh %[ftmp5], %[ftmp5], %[ftmp15] \n\t"
514  "psubh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
515  "psrah %[ftmp2], %[ftmp15], %[ftmp8] \n\t"
516  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
517  "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
518  "mov.d %[ftmp2], %[ftmp4] \n\t"
519  "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
520  "psrah %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
521  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
522  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
523  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
524  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
525  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
526  "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
527  "mov.d %[ftmp3], %[ftmp13] \n\t"
528  "psrah %[ftmp0], %[ftmp13], %[ftmp8] \n\t"
529  "psrah %[ftmp7], %[ftmp6], %[ftmp8] \n\t"
530  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
531  "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
532  MMI_LDC1(%[ftmp6], $sp, 0x08)
533  "dmtc1 %[tmp6], %[ftmp3] \n\t"
534  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
535  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
536  "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
537  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
538  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
539  "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
540  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
541  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
542  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
543  "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
544  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
545  "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
546  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
547  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
548  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
549  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
550  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
551  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
552  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
553  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
554  MMI_SDC1(%[ftmp6], $sp, 0x08)
555  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
556  MMI_SDC1(%[ftmp7], $sp, 0x18)
557  "dmfc1 %[tmp2], %[ftmp0] \n\t"
558  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
559  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
560  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
561  "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
562  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
563  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
564  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
565  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
566  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
567  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
568  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
569  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
570  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
571  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
572  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
573  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
574  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
575  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
576  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
577  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
578  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
579  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
580  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
581  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
582  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
583  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
584  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
585  MMI_LDC1(%[ftmp2], $sp, 0x08)
586  MMI_LDC1(%[ftmp5], $sp, 0x18)
587  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
588  "dmtc1 %[tmp2], %[ftmp1] \n\t"
589  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
590  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
591  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
592  "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
593  "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
594  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
595  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
596  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
597  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
598  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
599  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
600  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
601  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
602  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
603  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
604  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
605  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
606  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
607  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
608  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
609  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
610  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
611  "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
612  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
613  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
614  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
615  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
616  PTR_ADDIU "$sp, $sp, 0x20 \n\t"
617  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
618  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
619  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
620  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
621  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
622  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
623  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
624  [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
625  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
626  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
627  [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
628  [tmp6]"=&r"(tmp[6]),
631  [addr0]"=&r"(addr[0])
632  : [dst]"r"(dst), [block]"r"(block),
633  [stride]"r"((mips_reg)stride)
634  : "memory"
635  );
636 
637 }
638 
639 void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
640 {
641  int dc = (block[0] + 32) >> 6;
642  double ftmp[6];
644 
645  block[0] = 0;
646 
647  __asm__ volatile (
648  "mtc1 %[dc], %[ftmp5] \n\t"
649  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
650  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
651  MMI_ULWC1(%[ftmp1], %[dst0], 0x00)
652  MMI_ULWC1(%[ftmp2], %[dst1], 0x00)
653  MMI_ULWC1(%[ftmp3], %[dst2], 0x00)
654  MMI_ULWC1(%[ftmp4], %[dst3], 0x00)
655  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
656  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
657  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
658  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
659  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
660  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
661  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
662  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
663  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
664  "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
665  "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
666  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
667  MMI_SWC1(%[ftmp1], %[dst0], 0x00)
668  MMI_SWC1(%[ftmp2], %[dst1], 0x00)
669  MMI_SWC1(%[ftmp3], %[dst2], 0x00)
670  MMI_SWC1(%[ftmp4], %[dst3], 0x00)
671  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
672  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
673  [ftmp4]"=&f"(ftmp[4]),
675  [ftmp5]"=&f"(ftmp[5])
676  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
677  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
678  [dc]"r"(dc)
679  : "memory"
680  );
681 }
682 
683 void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
684 {
685  int dc = (block[0] + 32) >> 6;
686  double ftmp[10];
688 
689  block[0] = 0;
690 
691  __asm__ volatile (
692  "mtc1 %[dc], %[ftmp5] \n\t"
693  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
694  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
695  MMI_LDC1(%[ftmp1], %[dst0], 0x00)
696  MMI_LDC1(%[ftmp2], %[dst1], 0x00)
697  MMI_LDC1(%[ftmp3], %[dst2], 0x00)
698  MMI_LDC1(%[ftmp4], %[dst3], 0x00)
699  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
700  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
701  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
702  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
703  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
704  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
705  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
706  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
707  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
708  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
709  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
710  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
711  "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
712  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
713  "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
714  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
715  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
716  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
717  "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
718  "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
719  MMI_SDC1(%[ftmp1], %[dst0], 0x00)
720  MMI_SDC1(%[ftmp2], %[dst1], 0x00)
721  MMI_SDC1(%[ftmp3], %[dst2], 0x00)
722  MMI_SDC1(%[ftmp4], %[dst3], 0x00)
723 
724  MMI_LDC1(%[ftmp1], %[dst4], 0x00)
725  MMI_LDC1(%[ftmp2], %[dst5], 0x00)
726  MMI_LDC1(%[ftmp3], %[dst6], 0x00)
727  MMI_LDC1(%[ftmp4], %[dst7], 0x00)
728  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
729  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
730  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
731  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
732  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
733  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
734  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
735  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
736  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
737  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
738  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
739  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
740  "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
741  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
742  "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
743  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
744  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
745  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
746  "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
747  "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
748  MMI_SDC1(%[ftmp1], %[dst4], 0x00)
749  MMI_SDC1(%[ftmp2], %[dst5], 0x00)
750  MMI_SDC1(%[ftmp3], %[dst6], 0x00)
751  MMI_SDC1(%[ftmp4], %[dst7], 0x00)
752  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
753  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
754  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
755  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
756  [ftmp8]"=&f"(ftmp[8]),
758  [ftmp9]"=&f"(ftmp[9])
759  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
760  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
761  [dst4]"r"(dst+4*stride), [dst5]"r"(dst+5*stride),
762  [dst6]"r"(dst+6*stride), [dst7]"r"(dst+7*stride),
763  [dc]"r"(dc)
764  : "memory"
765  );
766 }
767 
768 void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
769  int16_t *block, int stride,
770  const uint8_t nnzc[5 * 8])
771 {
772  int i;
773  for(i=0; i<16; i++){
774  int nnz = nnzc[ scan8[i] ];
775  if(nnz){
776  if(nnz==1 && ((int16_t*)block)[i*16])
777  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
778  stride);
779  else
780  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
781  stride);
782  }
783  }
784 }
785 
786 void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
787  int16_t *block, int stride, const uint8_t nnzc[5 * 8])
788 {
789  int i;
790  for(i=0; i<16; i++){
791  if(nnzc[ scan8[i] ])
792  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
793  else if(((int16_t*)block)[i*16])
794  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
795  stride);
796  }
797 }
798 
799 void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
800  int16_t *block, int stride, const uint8_t nnzc[5 * 8])
801 {
802  int i;
803  for(i=0; i<16; i+=4){
804  int nnz = nnzc[ scan8[i] ];
805  if(nnz){
806  if(nnz==1 && ((int16_t*)block)[i*16])
807  ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
808  block + i*16, stride);
809  else
810  ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
811  stride);
812  }
813  }
814 }
815 
816 void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
817  int16_t *block, int stride, const uint8_t nnzc[15*8])
818 {
819  int i, j;
820  for(j=1; j<3; j++){
821  for(i=j*16; i<j*16+4; i++){
822  if(nnzc[ scan8[i] ])
823  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
824  block + i*16, stride);
825  else if(((int16_t*)block)[i*16])
826  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
827  block + i*16, stride);
828  }
829  }
830 }
831 
832 void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
833  int16_t *block, int stride, const uint8_t nnzc[15*8])
834 {
835  int i, j;
836 
837  for(j=1; j<3; j++){
838  for(i=j*16; i<j*16+4; i++){
839  if(nnzc[ scan8[i] ])
840  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
841  block + i*16, stride);
842  else if(((int16_t*)block)[i*16])
843  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
844  block + i*16, stride);
845  }
846  }
847 
848  for(j=1; j<3; j++){
849  for(i=j*16+4; i<j*16+8; i++){
850  if(nnzc[ scan8[i+4] ])
851  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
852  block + i*16, stride);
853  else if(((int16_t*)block)[i*16])
854  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
855  block + i*16, stride);
856  }
857  }
858 }
859 
861  int qmul)
862 {
863  double ftmp[10];
864  uint64_t tmp[2];
866 
867  __asm__ volatile (
868  ".set noreorder \n\t"
869  "dli %[tmp0], 0x08 \n\t"
870  MMI_LDC1(%[ftmp3], %[input], 0x18)
871  "mtc1 %[tmp0], %[ftmp8] \n\t"
872  MMI_LDC1(%[ftmp2], %[input], 0x10)
873  "dli %[tmp0], 0x20 \n\t"
874  MMI_LDC1(%[ftmp1], %[input], 0x08)
875  "mtc1 %[tmp0], %[ftmp9] \n\t"
876  MMI_LDC1(%[ftmp0], %[input], 0x00)
877  "mov.d %[ftmp4], %[ftmp3] \n\t"
878  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
879  "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
880  "mov.d %[ftmp4], %[ftmp1] \n\t"
881  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
882  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
883  "mov.d %[ftmp4], %[ftmp3] \n\t"
884  "paddh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
885  "psubh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
886  "mov.d %[ftmp4], %[ftmp2] \n\t"
887  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
888  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
889  "mov.d %[ftmp4], %[ftmp3] \n\t"
890  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
891  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
892  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
893  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
894  "punpckhwd %[ftmp2], %[ftmp3], %[ftmp0] \n\t"
895  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
896  "mov.d %[ftmp0], %[ftmp4] \n\t"
897  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
898  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
899  "mov.d %[ftmp1], %[ftmp0] \n\t"
900  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
901  "psubh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
902  "mov.d %[ftmp1], %[ftmp2] \n\t"
903  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
904  "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
905  "mov.d %[ftmp1], %[ftmp0] \n\t"
906  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
907  "psubh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
908  "mov.d %[ftmp1], %[ftmp4] \n\t"
909  "daddi %[tmp0], %[qmul], -0x7fff \n\t"
910  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
911  "bgtz %[tmp0], 1f \n\t"
912  "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
913  "ori %[tmp0], $0, 0x80 \n\t"
914  "dsll %[tmp0], %[tmp0], 0x10 \n\t"
915  "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
916  "daddu %[qmul], %[qmul], %[tmp0] \n\t"
917  "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
918  "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
919  "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
920  "mtc1 %[qmul], %[ftmp7] \n\t"
921  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
922  "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
923  "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
924  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
925  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
926  "psraw %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
927  "psraw %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
928  "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
929  "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
930  "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
931  "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
932  "dmfc1 %[tmp1], %[ftmp0] \n\t"
933  "ssrld %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
934  "mfc1 %[input], %[ftmp0] \n\t"
935  "sh %[tmp1], 0x00(%[output]) \n\t"
936  "sh %[input], 0x80(%[output]) \n\t"
937  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
938  PTR_SRL "%[input], %[input], 0x10 \n\t"
939  "sh %[tmp1], 0x20(%[output]) \n\t"
940  "sh %[input], 0xa0(%[output]) \n\t"
941  "dmfc1 %[tmp1], %[ftmp2] \n\t"
942  "ssrld %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
943  "mfc1 %[input], %[ftmp2] \n\t"
944  "sh %[tmp1], 0x40(%[output]) \n\t"
945  "sh %[input], 0xc0(%[output]) \n\t"
946  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
947  PTR_SRL "%[input], %[input], 0x10 \n\t"
948  "sh %[tmp1], 0x60(%[output]) \n\t"
949  "sh %[input], 0xe0(%[output]) \n\t"
950  "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
951  "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
952  "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
953  "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
954  "mtc1 %[qmul], %[ftmp7] \n\t"
955  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
956  "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
957  "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
958  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
959  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
960  "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
961  "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
962  "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
963  "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
964  "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
965  "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
966  "dmfc1 %[tmp1], %[ftmp3] \n\t"
967  "ssrld %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
968  "mfc1 %[input], %[ftmp3] \n\t"
969  "sh %[tmp1], 0x100(%[output]) \n\t"
970  "sh %[input], 0x180(%[output]) \n\t"
971  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
972  PTR_SRL "%[input], %[input], 0x10 \n\t"
973  "sh %[tmp1], 0x120(%[output]) \n\t"
974  "sh %[input], 0x1a0(%[output]) \n\t"
975  "dmfc1 %[tmp1], %[ftmp4] \n\t"
976  "ssrld %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
977  "mfc1 %[input], %[ftmp4] \n\t"
978  "sh %[tmp1], 0x140(%[output]) \n\t"
979  "sh %[input], 0x1c0(%[output]) \n\t"
980  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
981  PTR_SRL "%[input], %[input], 0x10 \n\t"
982  "sh %[tmp1], 0x160(%[output]) \n\t"
983  "j 2f \n\t"
984  "sh %[input], 0x1e0(%[output]) \n\t"
985  "1: \n\t"
986  "ori %[tmp0], $0, 0x1f \n\t"
987 #if HAVE_LOONGSON3
988  "clz %[tmp1], %[qmul] \n\t"
989 #elif HAVE_LOONGSON2
990 #endif
991  "ori %[input], $0, 0x07 \n\t"
992  "dsubu %[tmp1], %[tmp0], %[tmp1] \n\t"
993  "ori %[tmp0], $0, 0x80 \n\t"
994  "dsll %[tmp0], %[tmp0], 0x10 \n\t"
995  "daddu %[qmul], %[qmul], %[tmp0] \n\t"
996  "dsubu %[tmp0], %[tmp1], %[input] \n\t"
997  "movn %[tmp1], %[input], %[tmp0] \n\t"
998  PTR_ADDIU "%[input], %[input], 0x01 \n\t"
999  "andi %[tmp0], %[tmp1], 0xff \n\t"
1000  "srlv %[qmul], %[qmul], %[tmp0] \n\t"
1001  PTR_SUBU "%[input], %[input], %[tmp1] \n\t"
1002  "mtc1 %[input], %[ftmp6] \n\t"
1003  "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
1004  "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
1005  "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
1006  "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
1007  "mtc1 %[qmul], %[ftmp7] \n\t"
1008  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1009  "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1010  "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1011  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1012  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1013  "psraw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1014  "psraw %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1015  "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1016  "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1017  "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1018  "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1019  "dmfc1 %[tmp1], %[ftmp0] \n\t"
1020  "ssrld %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1021  "sh %[tmp1], 0x00(%[output]) \n\t"
1022  "mfc1 %[input], %[ftmp0] \n\t"
1023  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1024  "sh %[input], 0x80(%[output]) \n\t"
1025  "sh %[tmp1], 0x20(%[output]) \n\t"
1026  PTR_SRL "%[input], %[input], 0x10 \n\t"
1027  "dmfc1 %[tmp1], %[ftmp2] \n\t"
1028  "sh %[input], 0xa0(%[output]) \n\t"
1029  "ssrld %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
1030  "sh %[tmp1], 0x40(%[output]) \n\t"
1031  "mfc1 %[input], %[ftmp2] \n\t"
1032  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1033  "sh %[input], 0xc0(%[output]) \n\t"
1034  "sh %[tmp1], 0x60(%[output]) \n\t"
1035  PTR_SRL "%[input], %[input], 0x10 \n\t"
1036  "sh %[input], 0xe0(%[output]) \n\t"
1037  "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
1038  "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
1039  "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
1040  "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
1041  "mtc1 %[qmul], %[ftmp7] \n\t"
1042  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1043  "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1044  "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1045  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1046  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1047  "psraw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1048  "psraw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1049  "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1050  "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1051  "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1052  "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1053  "dmfc1 %[tmp1], %[ftmp3] \n\t"
1054  "ssrld %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1055  "mfc1 %[input], %[ftmp3] \n\t"
1056  "sh %[tmp1], 0x100(%[output]) \n\t"
1057  "sh %[input], 0x180(%[output]) \n\t"
1058  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1059  PTR_SRL "%[input], %[input], 0x10 \n\t"
1060  "sh %[tmp1], 0x120(%[output]) \n\t"
1061  "sh %[input], 0x1a0(%[output]) \n\t"
1062  "dmfc1 %[tmp1], %[ftmp4] \n\t"
1063  "ssrld %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1064  "mfc1 %[input], %[ftmp4] \n\t"
1065  "sh %[tmp1], 0x140(%[output]) \n\t"
1066  "sh %[input], 0x1c0(%[output]) \n\t"
1067  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1068  PTR_SRL "%[input], %[input], 0x10 \n\t"
1069  "sh %[tmp1], 0x160(%[output]) \n\t"
1070  "sh %[input], 0x1e0(%[output]) \n\t"
1071  "2: \n\t"
1072  ".set reorder \n\t"
1073  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1074  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1075  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1076  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1077  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1078  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
1080  [output]"+&r"(output), [input]"+&r"(input),
1081  [qmul]"+&r"(qmul)
1082  : [ff_pw_1]"f"(ff_pw_1.f)
1083  : "memory"
1084  );
1085 }
1086 
1087 void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1088  int log2_denom, int weight, int offset)
1089 {
1090  int y;
1091  double ftmp[8];
1093 
1094  offset <<= log2_denom;
1095 
1096  if (log2_denom)
1097  offset += 1 << (log2_denom - 1);
1098 
1099  for (y=0; y<height; y++, block+=stride) {
1100  __asm__ volatile (
1101  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1102  MMI_LDC1(%[ftmp1], %[block0], 0x00)
1103  MMI_LDC1(%[ftmp2], %[block1], 0x00)
1104  "mtc1 %[weight], %[ftmp3] \n\t"
1105  "mtc1 %[offset], %[ftmp4] \n\t"
1106  "mtc1 %[log2_denom], %[ftmp5] \n\t"
1107  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1108  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1109  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
1110  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
1111  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1112  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1113  "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1114  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1115  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1116  "pmullh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1117  "paddsh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1118  "paddsh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1119  "paddsh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1120  "paddsh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1121  "psrah %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1122  "psrah %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1123  "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1124  "psrah %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1125  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1126  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1127  MMI_SDC1(%[ftmp1], %[block0], 0x00)
1128  MMI_SDC1(%[ftmp2], %[block1], 0x00)
1129  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1130  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1131  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1132  [ftmp6]"=&f"(ftmp[6]),
1134  [ftmp7]"=&f"(ftmp[7])
1135  : [block0]"r"(block), [block1]"r"(block+8),
1136  [weight]"r"(weight), [offset]"r"(offset),
1137  [log2_denom]"r"(log2_denom)
1138  : "memory"
1139  );
1140  }
1141 }
1142 
1143 void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
1144  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1145  int offset)
1146 {
1147  int y;
1148  double ftmp[9];
1150 
1151  offset = ((offset + 1) | 1) << log2_denom;
1152 
1153  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1154  __asm__ volatile (
1155  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1156  MMI_LDC1(%[ftmp1], %[src0], 0x00)
1157  MMI_LDC1(%[ftmp2], %[dst0], 0x00)
1158  "mtc1 %[weights], %[ftmp3] \n\t"
1159  "mtc1 %[weightd], %[ftmp4] \n\t"
1160  "mtc1 %[offset], %[ftmp5] \n\t"
1161  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1162  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1163  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1164  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1165  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1166  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1167  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1168  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1169  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1170  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1171  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1172  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1173  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1174  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1175  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1176  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1177  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1178  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1179  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1180  MMI_SDC1(%[ftmp1], %[dst0], 0x00)
1181  MMI_LDC1(%[ftmp1], %[src1], 0x00)
1182  MMI_LDC1(%[ftmp2], %[dst1], 0x00)
1183  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1184  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1185  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1186  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1187  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1188  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1189  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1190  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1191  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1192  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1193  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1194  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1195  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1196  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1197  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1198  MMI_SDC1(%[ftmp1], %[dst1], 0x00)
1199  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1200  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1201  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1202  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1204  [ftmp8]"=&f"(ftmp[8])
1205  : [dst0]"r"(dst), [dst1]"r"(dst+8),
1206  [src0]"r"(src), [src1]"r"(src+8),
1207  [weights]"r"(weights), [weightd]"r"(weightd),
1208  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1209  : "memory"
1210  );
1211  }
1212 }
1213 
1214 void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1215  int log2_denom, int weight, int offset)
1216 {
1217  int y;
1218  double ftmp[6];
1220 
1221  offset <<= log2_denom;
1222 
1223  if (log2_denom)
1224  offset += 1 << (log2_denom - 1);
1225 
1226  for (y=0; y<height; y++, block+=stride) {
1227  __asm__ volatile (
1228  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1229  MMI_LDC1(%[ftmp1], %[block], 0x00)
1230  "mtc1 %[weight], %[ftmp2] \n\t"
1231  "mtc1 %[offset], %[ftmp3] \n\t"
1232  "mtc1 %[log2_denom], %[ftmp5] \n\t"
1233  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1234  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1235  "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
1236  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1237  "pmullh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
1238  "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1239  "paddsh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1240  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1241  "psrah %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1242  "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1243  "packushb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1244  MMI_SDC1(%[ftmp1], %[block], 0x00)
1245  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1246  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1247  [ftmp4]"=&f"(ftmp[4]),
1249  [ftmp5]"=&f"(ftmp[5])
1250  : [block]"r"(block), [weight]"r"(weight),
1251  [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1252  : "memory"
1253  );
1254  }
1255 }
1256 
1257 void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src,
1258  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1259  int offset)
1260 {
1261  int y;
1262  double ftmp[9];
1264 
1265  offset = ((offset + 1) | 1) << log2_denom;
1266 
1267  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1268  __asm__ volatile (
1269  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1270  MMI_LDC1(%[ftmp1], %[src], 0x00)
1271  MMI_LDC1(%[ftmp2], %[dst], 0x00)
1272  "mtc1 %[weights], %[ftmp3] \n\t"
1273  "mtc1 %[weightd], %[ftmp4] \n\t"
1274  "mtc1 %[offset], %[ftmp5] \n\t"
1275  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1276  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1277  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1278  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1279  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1280  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1281  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1282  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1283  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1284  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1285  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1286  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1287  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1288  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1289  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1290  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1291  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1292  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1293  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1294  MMI_SDC1(%[ftmp1], %[dst], 0x00)
1295  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1296  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1297  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1298  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1300  [ftmp8]"=&f"(ftmp[8])
1301  : [dst]"r"(dst), [src]"r"(src),
1302  [weights]"r"(weights), [weightd]"r"(weightd),
1303  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1304  : "memory"
1305  );
1306  }
1307 }
1308 
1309 void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1310  int log2_denom, int weight, int offset)
1311 {
1312  int y;
1313  double ftmp[5];
1315 
1316  offset <<= log2_denom;
1317 
1318  if (log2_denom)
1319  offset += 1 << (log2_denom - 1);
1320 
1321  for (y=0; y<height; y++, block+=stride) {
1322  __asm__ volatile (
1323  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1324  MMI_ULWC1(%[ftmp1], %[block], 0x00)
1325  "mtc1 %[weight], %[ftmp2] \n\t"
1326  "mtc1 %[offset], %[ftmp3] \n\t"
1327  "mtc1 %[log2_denom], %[ftmp4] \n\t"
1328  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1329  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1330  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1331  "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1332  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1333  "psrah %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1334  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1335  MMI_SWC1(%[ftmp1], %[block], 0x00)
1336  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1337  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1339  [ftmp4]"=&f"(ftmp[4])
1340  : [block]"r"(block), [weight]"r"(weight),
1341  [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1342  : "memory"
1343  );
1344  }
1345 }
1346 
1347 void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
1348  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1349  int offset)
1350 {
1351  int y;
1352  double ftmp[7];
1354 
1355  offset = ((offset + 1) | 1) << log2_denom;
1356 
1357  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1358  __asm__ volatile (
1359  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1360  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1361  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
1362  "mtc1 %[weight], %[ftmp3] \n\t"
1363  "mtc1 %[weightd], %[ftmp4] \n\t"
1364  "mtc1 %[offset], %[ftmp5] \n\t"
1365  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1366  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1367  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1368  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1369  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1370  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1371  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1372  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1373  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1374  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1375  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1376  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1377  MMI_SWC1(%[ftmp1], %[dst], 0x00)
1378  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1379  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1380  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1382  [ftmp6]"=&f"(ftmp[6])
1383  : [dst]"r"(dst), [src]"r"(src),
1384  [weight]"r"(weights), [weightd]"r"(weightd),
1385  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1386  : "memory"
1387  );
1388  }
1389 }
1390 
1391 void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1392  int8_t *tc0)
1393 {
1394  double ftmp[12];
1395  mips_reg addr[2];
1399 
1400  __asm__ volatile (
1401  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1402  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1403  PTR_ADDU "%[addr1], %[stride], %[addr0] \n\t"
1404  "addi %[alpha], %[alpha], -0x01 \n\t"
1405  PTR_SUBU "%[addr1], $0, %[addr1] \n\t"
1406  "addi %[beta], %[beta], -0x01 \n\t"
1407  PTR_ADDU "%[addr1], %[addr1], %[pix] \n\t"
1408  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1409  MMI_LDXC1(%[ftmp1], %[addr1], %[stride], 0x00)
1410  MMI_LDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1411  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1412  "mtc1 %[alpha], %[ftmp5] \n\t"
1413  "mtc1 %[beta], %[ftmp6] \n\t"
1414  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1415  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1416  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1417  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1418  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1419  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1420  "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1421  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1422  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1423  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1424  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1425  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1426  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1427  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1428  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1429  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1430  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1431  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1432  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1433  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
1434  MMI_ULWC1(%[ftmp5], %[tc0], 0x00)
1435  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1436  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp5] \n\t"
1437  "pcmpgtb %[ftmp5], %[ftmp9], %[ftmp4] \n\t"
1438  MMI_LDC1(%[ftmp4], %[addr1], 0x00)
1439  "pand %[ftmp10], %[ftmp5], %[ftmp8] \n\t"
1440  "psubusb %[ftmp8], %[ftmp4], %[ftmp2] \n\t"
1441  "psubusb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1442  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1443  "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1444  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1445  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1446  "pand %[ftmp5], %[ftmp10], %[ftmp9] \n\t"
1447  "psubb %[ftmp8], %[ftmp5], %[ftmp7] \n\t"
1448  "pand %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1449  "pavgb %[ftmp5], %[ftmp2], %[ftmp3] \n\t"
1450  MMI_LDC1(%[ftmp11], %[addr1], 0x00)
1451  "pavgb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1452  "pxor %[ftmp5], %[ftmp5], %[ftmp11] \n\t"
1453  "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1454  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1455  "psubusb %[ftmp5], %[ftmp1], %[ftmp7] \n\t"
1456  "paddusb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1457  "pmaxub %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1458  "pminub %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1459  MMI_SDXC1(%[ftmp4], %[addr1], %[stride], 0x00)
1460  MMI_LDXC1(%[ftmp5], %[pix], %[addr0], 0x00)
1461  "psubusb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
1462  "psubusb %[ftmp7], %[ftmp3], %[ftmp5] \n\t"
1463  "psubusb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1464  "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1465  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1466  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1467  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1468  "pand %[ftmp6], %[ftmp9], %[ftmp7] \n\t"
1469  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1470  "pavgb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1471  MMI_LDXC1(%[ftmp11], %[pix], %[addr0], 0x00)
1472  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1473  "pxor %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1474  "pand %[ftmp7], %[ftmp7], %[ff_pb_1] \n\t"
1475  "psubusb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1476  "psubusb %[ftmp7], %[ftmp4], %[ftmp6] \n\t"
1477  "paddusb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1478  "pmaxub %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1479  "pminub %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1480  MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1481  "pxor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1482  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1483  "pand %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1484  "pxor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1485  "pxor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1486  "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1487  "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1488  "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1489  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1490  "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1491  "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1492  "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1493  "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1494  "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1495  "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1496  "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1497  "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1498  "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1499  MMI_SDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1500  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1501  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1502  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1503  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1504  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1505  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1506  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1510  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
1511  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1512  [alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta),
1513  [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
1514  [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
1515  : "memory"
1516  );
1517 }
1518 
1519 static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1520  int beta)
1521 {
1522  DECLARE_ALIGNED(8, const uint64_t, stack[0x0a]);
1523  double ftmp[16];
1524  uint64_t tmp[1];
1525  mips_reg addr[3];
1528 
1529  __asm__ volatile (
1530  "ori %[tmp0], $0, 0x01 \n\t"
1531  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1532  "mtc1 %[tmp0], %[ftmp9] \n\t"
1533  PTR_SLL "%[addr0], %[stride], 0x02 \n\t"
1534  PTR_ADDU "%[addr2], %[stride], %[stride] \n\t"
1535  PTR_ADDIU "%[alpha], %[alpha], -0x01 \n\t"
1536  "sslld %[ftmp11], %[ftmp9], %[ftmp9] \n\t"
1537  "bltz %[alpha], 1f \n\t"
1538  PTR_ADDU "%[addr1], %[addr2], %[stride] \n\t"
1539  PTR_ADDIU "%[beta], %[beta], -0x01 \n\t"
1540  "bltz %[beta], 1f \n\t"
1541  PTR_SUBU "%[addr0], $0, %[addr0] \n\t"
1542  PTR_ADDU "%[addr0], %[addr0], %[pix] \n\t"
1543  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1544  MMI_LDXC1(%[ftmp1], %[addr0], %[addr2], 0x00)
1545  MMI_LDXC1(%[ftmp2], %[addr0], %[addr1], 0x00)
1546  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1547  "mtc1 %[alpha], %[ftmp5] \n\t"
1548  "mtc1 %[beta], %[ftmp6] \n\t"
1549  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1550  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1551  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1552  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1553  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1554  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1555  "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1556  MMI_SDC1(%[ftmp5], %[stack], 0x10)
1557  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1558  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1559  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1560  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1561  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1562  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1563  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1564  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1565  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1566  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1567  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1568  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1569  MMI_LDC1(%[ftmp5], %[stack], 0x10)
1570  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1571  "ldc1 %[ftmp10], %[ff_pb_1] \n\t"
1572  MMI_SDC1(%[ftmp8], %[stack], 0x20)
1573  "pavgb %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1574  "psubusb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1575  "pavgb %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
1576  "psubusb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1577  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1578  "psubusb %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1579  MMI_LDC1(%[ftmp15], %[stack], 0x20)
1580  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1581  "pand %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
1582  MMI_LDXC1(%[ftmp15], %[addr0], %[stride], 0x00)
1583  "psubusb %[ftmp8], %[ftmp15], %[ftmp2] \n\t"
1584  "psubusb %[ftmp5], %[ftmp2], %[ftmp15] \n\t"
1585  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1586  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1587  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1588  "pand %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1589  MMI_LDXC1(%[ftmp14], %[pix], %[addr2], 0x00)
1590  MMI_SDC1(%[ftmp5], %[stack], 0x30)
1591  "psubusb %[ftmp8], %[ftmp14], %[ftmp3] \n\t"
1592  "psubusb %[ftmp5], %[ftmp3], %[ftmp14] \n\t"
1593  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1594  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1595  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1596  "pand %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1597  MMI_SDC1(%[ftmp5], %[stack], 0x40)
1598  "pavgb %[ftmp5], %[ftmp15], %[ftmp1] \n\t"
1599  "pavgb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1600  "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1601  MMI_SDC1(%[ftmp6], %[stack], 0x10)
1602  "paddb %[ftmp7], %[ftmp15], %[ftmp1] \n\t"
1603  "paddb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1604  "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1605  "mov.d %[ftmp8], %[ftmp7] \n\t"
1606  MMI_SDC1(%[ftmp7], %[stack], 0x00)
1607  "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1608  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1609  "pxor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1610  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1611  "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1612  "pavgb %[ftmp6], %[ftmp15], %[ftmp4] \n\t"
1613  "psubb %[ftmp7], %[ftmp15], %[ftmp4] \n\t"
1614  "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1615  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1616  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1617  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1618  MMI_LDC1(%[ftmp13], %[stack], 0x10)
1619  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1620  "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1621  "pavgb %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1622  "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1623  "pxor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1624  "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1625  "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1626  "pxor %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
1627  "pavgb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1628  "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1629  "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1630  MMI_LDC1(%[ftmp13], %[stack], 0x30)
1631  "pavgb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1632  MMI_LDC1(%[ftmp12], %[stack], 0x20)
1633  "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1634  "pxor %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
1635  "pand %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1636  "pand %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1637  "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1638  "pxor %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1639  MMI_SDXC1(%[ftmp6], %[addr0], %[addr1], 0x00)
1640  MMI_LDC1(%[ftmp6], %[addr0], 0x00)
1641  "paddb %[ftmp7], %[ftmp15], %[ftmp6] \n\t"
1642  "pavgb %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1643  MMI_LDC1(%[ftmp12], %[stack], 0x00)
1644  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1645  "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1646  "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1647  "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1648  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1649  "pxor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1650  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1651  MMI_LDC1(%[ftmp12], %[stack], 0x30)
1652  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1653  "pxor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1654  "pxor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1655  "pand %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1656  "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1657  "pxor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1658  "pxor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1659  MMI_SDXC1(%[ftmp5], %[addr0], %[addr2], 0x00)
1660  MMI_SDXC1(%[ftmp6], %[addr0], %[stride], 0x00)
1661  "pavgb %[ftmp5], %[ftmp14], %[ftmp4] \n\t"
1662  "pavgb %[ftmp6], %[ftmp3], %[ftmp2] \n\t"
1663  "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1664  MMI_SDC1(%[ftmp6], %[stack], 0x10)
1665  "paddb %[ftmp7], %[ftmp14], %[ftmp4] \n\t"
1666  "paddb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1667  "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1668  "mov.d %[ftmp8], %[ftmp7] \n\t"
1669  MMI_SDC1(%[ftmp7], %[stack], 0x00)
1670  "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1671  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1672  "pxor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1673  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1674  "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1675  "pavgb %[ftmp6], %[ftmp14], %[ftmp1] \n\t"
1676  "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1677  "psubb %[ftmp7], %[ftmp14], %[ftmp1] \n\t"
1678  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1679  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1680  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1681  MMI_LDC1(%[ftmp12], %[stack], 0x10)
1682  "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1683  "pavgb %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1684  "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1685  "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1686  "pxor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1687  "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1688  "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1689  "pxor %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
1690  "pavgb %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
1691  "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1692  MMI_LDC1(%[ftmp12], %[stack], 0x40)
1693  "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1694  MMI_LDC1(%[ftmp13], %[stack], 0x20)
1695  "pavgb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1696  "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1697  "pxor %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1698  "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1699  "pand %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
1700  "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1701  "pxor %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1702  MMI_SDC1(%[ftmp6], %[pix], 0x00)
1703  MMI_LDXC1(%[ftmp6], %[pix], %[addr1], 0x00)
1704  "paddb %[ftmp7], %[ftmp14], %[ftmp6] \n\t"
1705  "pavgb %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1706  MMI_LDC1(%[ftmp12], %[stack], 0x00)
1707  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1708  "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1709  "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1710  "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1711  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1712  "pxor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1713  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1714  MMI_LDC1(%[ftmp12], %[stack], 0x40)
1715  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1716  "pxor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1717  "pxor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1718  "pand %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1719  "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1720  "pxor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1721  "pxor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1722  MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1723  MMI_SDXC1(%[ftmp6], %[pix], %[addr2], 0x00)
1724  "1: \n\t"
1725  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1726  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1727  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1728  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1729  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1730  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1731  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
1732  [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
1733  [tmp0]"=&r"(tmp[0]),
1736  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
1737  [addr2]"=&r"(addr[2]),
1738  [alpha]"+&r"(alpha), [beta]"+&r"(beta)
1739  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1740  [stack]"r"(stack), [ff_pb_1]"m"(ff_pb_1)
1741  : "memory"
1742  );
1743 }
1744 
1745 void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1746  int beta, int8_t *tc0)
1747 {
1748  double ftmp[9];
1749  mips_reg addr[1];
1753 
1754  __asm__ volatile (
1755  "addi %[alpha], %[alpha], -0x01 \n\t"
1756  "addi %[beta], %[beta], -0x01 \n\t"
1757  "or %[addr0], $0, %[pix] \n\t"
1758  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1759  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1760  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1761  MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1762  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1763  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1764 
1765  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1766  "mtc1 %[alpha], %[ftmp5] \n\t"
1767  "mtc1 %[beta], %[ftmp6] \n\t"
1768  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1769  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1770  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1771  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1772  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1773  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1774  "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1775  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1776  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1777  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1778  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1779  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1780  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1781  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1782  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1783  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1784  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1785  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1786  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1787  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1788  MMI_ULWC1(%[ftmp7], %[tc0], 0x00)
1789  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1790  "pand %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1791  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1792  "pxor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1793  "pxor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1794  "pand %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1795  "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1796  "pxor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1797  "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1798  "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1799  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1800  "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1801  "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1802  "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1803  "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1804  "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1805  "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1806  "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1807  "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1808  "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1809 
1810  MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1811  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1812  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1813  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1814  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1815  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1816  [ftmp8]"=&f"(ftmp[8]),
1820  [addr0]"=&r"(addr[0])
1821  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1822  [alpha]"r"(alpha), [beta]"r"(beta),
1823  [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
1824  [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
1825  : "memory"
1826  );
1827 }
1828 
1829 void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1830  int beta)
1831 {
1832  double ftmp[9];
1833  mips_reg addr[1];
1836 
1837  __asm__ volatile (
1838  "addi %[alpha], %[alpha], -0x01 \n\t"
1839  "addi %[beta], %[beta], -0x01 \n\t"
1840  "or %[addr0], $0, %[pix] \n\t"
1841  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1842  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1843  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1844  MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1845  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1846  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1847 
1848  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1849  "mtc1 %[alpha], %[ftmp5] \n\t"
1850  "mtc1 %[beta], %[ftmp6] \n\t"
1851  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1852  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1853  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1854  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1855  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1856  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1857  "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1858  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1859  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1860  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1861  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1862  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1863  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1864  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1865  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1866  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1867  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1868  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1869  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1870  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1871  "mov.d %[ftmp6], %[ftmp2] \n\t"
1872  "mov.d %[ftmp7], %[ftmp3] \n\t"
1873  "pxor %[ftmp5], %[ftmp2], %[ftmp4] \n\t"
1874  "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1875  "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1876  "psubusb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1877  "pavgb %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1878  "pxor %[ftmp5], %[ftmp3], %[ftmp1] \n\t"
1879  "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1880  "pavgb %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1881  "psubusb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1882  "pavgb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1883  "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1884  "psubb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1885  "pand %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1886  "pand %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1887  "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1888  "paddb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1889 
1890  MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1891  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1892  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1893  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1894  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1895  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1896  [ftmp8]"=&f"(ftmp[8]),
1899  [addr0]"=&r"(addr[0])
1900  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1901  [alpha]"r"(alpha), [beta]"r"(beta),
1902  [ff_pb_1]"f"(ff_pb_1.f)
1903  : "memory"
1904  );
1905 }
1906 
1907 void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1908  int8_t *tc0)
1909 {
1910  double ftmp[11];
1911  mips_reg addr[6];
1913 
1914  __asm__ volatile (
1915  "addi %[alpha], %[alpha], -0x01 \n\t"
1916  "addi %[beta], %[beta], -0x01 \n\t"
1917  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1918  PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
1919  PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
1920  PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
1921  "or %[addr5], $0, %[pix] \n\t"
1922  PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
1923  MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
1924  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
1925  MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
1926  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
1927  MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
1928  MMI_ULWC1(%[ftmp3], %[pix], 0x00)
1929  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1930  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1931  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
1932  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
1933  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1934  MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
1935  PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
1936  MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
1937  PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
1938  MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
1939  PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
1940  MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
1941  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1942  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1943  "mov.d %[ftmp6], %[ftmp4] \n\t"
1944  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1945  "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1946  "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
1947  "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
1948  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1949  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1950  "mov.d %[ftmp9], %[ftmp0] \n\t"
1951  "mov.d %[ftmp10], %[ftmp3] \n\t"
1952 
1953  "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1954  "mtc1 %[alpha], %[ftmp4] \n\t"
1955  "mtc1 %[beta], %[ftmp5] \n\t"
1956  "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1957  "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1958  "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
1959  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1960  "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
1961  "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
1962  "por %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1963  "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1964  "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
1965  "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
1966  "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1967  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1968  "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1969  "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1970  "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
1971  "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1972  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1973  "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1974  "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1975  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1976  MMI_ULWC1(%[ftmp6], %[tc0], 0x00)
1977  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1978  "pand %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1979  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
1980  "pxor %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1981  "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1982  "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1983  "pavgb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1984  "pxor %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1985  "pavgb %[ftmp3], %[ftmp3], %[ff_pb_3] \n\t"
1986  "pavgb %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
1987  "pavgb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1988  "paddusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1989  "psubusb %[ftmp6], %[ff_pb_A1], %[ftmp3] \n\t"
1990  "psubusb %[ftmp3], %[ftmp3], %[ff_pb_A1] \n\t"
1991  "pminub %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1992  "pminub %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1993  "psubusb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1994  "psubusb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1995  "paddusb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1996  "paddusb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1997 
1998  "punpckhwd %[ftmp4], %[ftmp9], %[ftmp9] \n\t"
1999  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2000  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2001  "punpcklbh %[ftmp0], %[ftmp9], %[ftmp1] \n\t"
2002  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
2003  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2004  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2005  MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2006  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2007  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2008  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2009  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2010  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2011  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2012  "punpckhwd %[ftmp3], %[ftmp10], %[ftmp10] \n\t"
2013  MMI_USWC1(%[ftmp0], %[pix], 0x00)
2014  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2015  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2016  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2017  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2018  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2019  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2020  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2021  PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2022  PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2023  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2024  MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2025  PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2026  "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2027  MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2028  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2029  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2030  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2031  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2032  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2033  [ftmp10]"=&f"(ftmp[10]),
2035  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2036  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2037  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2038  [pix]"+&r"(pix)
2039  : [alpha]"r"(alpha), [beta]"r"(beta),
2040  [stride]"r"((mips_reg)stride), [tc0]"r"(tc0),
2041  [ff_pb_1]"f"(ff_pb_1.f), [ff_pb_3]"f"(ff_pb_3.f),
2042  [ff_pb_A1]"f"(ff_pb_A1.f)
2043  : "memory"
2044  );
2045 }
2046 
2047 void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2048  int beta)
2049 {
2050  double ftmp[11];
2051  mips_reg addr[6];
2053 
2054  __asm__ volatile (
2055  "addi %[alpha], %[alpha], -0x01 \n\t"
2056  "addi %[beta], %[beta], -0x01 \n\t"
2057  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2058  PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
2059  PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
2060  PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
2061  "or %[addr5], $0, %[pix] \n\t"
2062  PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
2063  MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
2064  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2065  MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
2066  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2067  MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
2068  MMI_ULWC1(%[ftmp3], %[pix], 0x00)
2069  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2070  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2071  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2072  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
2073  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2074  MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
2075  PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
2076  MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
2077  PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
2078  MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
2079  PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
2080  MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
2081  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2082  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
2083  "mov.d %[ftmp6], %[ftmp4] \n\t"
2084  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2085  "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
2086  "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
2087  "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
2088  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2089  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2090 
2091  "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2092  "mtc1 %[alpha], %[ftmp4] \n\t"
2093  "mtc1 %[beta], %[ftmp5] \n\t"
2094  "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2095  "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2096  "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2097  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2098  "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2099  "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2100  "por %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2101  "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2102  "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2103  "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2104  "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2105  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2106  "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2107  "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2108  "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2109  "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2110  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2111  "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2112  "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2113  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2114  "mov.d %[ftmp5], %[ftmp1] \n\t"
2115  "mov.d %[ftmp6], %[ftmp2] \n\t"
2116  "pxor %[ftmp4], %[ftmp1], %[ftmp3] \n\t"
2117  "pand %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2118  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2119  "psubusb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
2120  "pavgb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2121  "pxor %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2122  "pand %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2123  "pavgb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2124  "psubusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2125  "pavgb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2126  "psubb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2127  "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2128  "pand %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2129  "pand %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2130  "paddb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2131  "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2132 
2133  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2134  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2135  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2136  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2137  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2138  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2139  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2140  MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2141  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2142  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2143  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2144  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2145  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2146  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2147  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2148  MMI_USWC1(%[ftmp0], %[pix], 0x00)
2149  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2150  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2151  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2152  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2153  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2154  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2155  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2156  PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2157  PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2158  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2159  PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2160  MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2161  "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2162  MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2163  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2164  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2165  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2166  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2167  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2168  [ftmp10]"=&f"(ftmp[10]),
2170  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2171  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2172  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2173  [pix]"+&r"(pix)
2174  : [alpha]"r"(alpha), [beta]"r"(beta),
2175  [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1.f)
2176  : "memory"
2177  );
2178 }
2179 
2180 void ff_deblock_v_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2181  int8_t *tc0)
2182 {
2183  if ((tc0[0] & tc0[1]) >= 0)
2184  ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
2185  if ((tc0[2] & tc0[3]) >= 0)
2186  ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
2187 }
2188 
2189 void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2190  int beta)
2191 {
2192  deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
2193  deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
2194 }
2195 
2196 void ff_deblock_h_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2197  int8_t *tc0)
2198 {
2199  DECLARE_ALIGNED(8, const uint64_t, stack[0x0d]);
2200  double ftmp[9];
2201  mips_reg addr[8];
2204 
2205  __asm__ volatile (
2206  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2207  PTR_ADDI "%[addr1], %[pix], -0x4 \n\t"
2208  PTR_ADDU "%[addr2], %[stride], %[addr0] \n\t"
2209  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2210  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2211  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2212  MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2213  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2214  MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2215  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2216  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2217  MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2218  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2219  MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2220  PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2221  MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2222  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2223  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2224  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2225  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2226  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2227  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2228  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2229  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2230  MMI_SDC1(%[ftmp1], %[stack], 0x10)
2231  MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2232  PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2233  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2234  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2235  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2236  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2237  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2238  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2239  MMI_LDC1(%[ftmp8], %[stack], 0x10)
2240  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2241  MMI_SDC1(%[ftmp0], %[stack], 0x00)
2242  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2243  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2244  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2245  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2246  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2247  "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2248  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2249  "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2250  "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2251  MMI_SDC1(%[ftmp1], %[stack], 0x10)
2252  MMI_SDC1(%[ftmp3], %[stack], 0x20)
2253  MMI_SDC1(%[ftmp7], %[stack], 0x30)
2254  MMI_SDC1(%[ftmp5], %[stack], 0x40)
2255  MMI_SDC1(%[ftmp6], %[stack], 0x50)
2256  PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2257  PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2258  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2259  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2260  MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2261  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2262  MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2263  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2264  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2265  MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2266  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2267  MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2268  PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2269  MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2270  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2271  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2272  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2273  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2274  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2275  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2276  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2277  MMI_SDC1(%[ftmp1], %[stack], 0x18)
2278  MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2279  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2280  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2281  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2282  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2283  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2284  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2285  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2286  MMI_LDC1(%[ftmp8], %[stack], 0x18)
2287  MMI_SDC1(%[ftmp0], %[stack], 0x08)
2288  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2289  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2290  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2291  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2292  "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2293  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2294  "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2295  "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2296  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2297  MMI_SDC1(%[ftmp1], %[stack], 0x18)
2298  MMI_SDC1(%[ftmp3], %[stack], 0x28)
2299  MMI_SDC1(%[ftmp7], %[stack], 0x38)
2300  MMI_SDC1(%[ftmp5], %[stack], 0x48)
2301  MMI_SDC1(%[ftmp6], %[stack], 0x58)
2302  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2303  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2304  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2305  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2306  [ftmp8]"=&f"(ftmp[8]),
2308  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2309  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2310  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2311  [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2312  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2313  [stack]"r"(stack)
2314  : "memory"
2315  );
2316 
2317  ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
2318 
2319  __asm__ volatile (
2320  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2321  PTR_ADDI "%[addr1], %[pix], -0x02 \n\t"
2322  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2323  PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2324  PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2325  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2326  MMI_LDC1(%[ftmp0], %[stack], 0x10)
2327  MMI_LDC1(%[ftmp1], %[stack], 0x20)
2328  MMI_LDC1(%[ftmp2], %[stack], 0x30)
2329  MMI_LDC1(%[ftmp3], %[stack], 0x40)
2330  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2331  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2332  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2333  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2334  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2335  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2336  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2337  MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2338  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2339  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2340  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2341  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2342  MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2343  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2344  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2345  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2346  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2347  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2348  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2349  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2350  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2351  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2352  PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2353  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2354  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2355  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2356  MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2357  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2358  "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2359  PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2360  MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2361  PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2362  MMI_LDC1(%[ftmp0], %[stack], 0x18)
2363  MMI_LDC1(%[ftmp1], %[stack], 0x28)
2364  MMI_LDC1(%[ftmp2], %[stack], 0x38)
2365  MMI_LDC1(%[ftmp3], %[stack], 0x48)
2366  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2367  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2368  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2369  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2370  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2371  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2372  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2373  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2374  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2375  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2376  MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2377  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2378  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2379  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2380  MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2381  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2382  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2383  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2384  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2385  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2386  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2387  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2388  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2389  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2390  PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2391  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2392  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2393  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2394  MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2395  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2396  "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2397  MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2398  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2399  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2400  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2401  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2402  [ftmp8]"=&f"(ftmp[8]),
2405  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2406  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2407  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2408  [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2409  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2410  [stack]"r"(stack)
2411  : "memory"
2412  );
2413 }
2414 
2415 void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2416  int beta)
2417 {
2418  DECLARE_ALIGNED(8, const uint64_t, ptmp[0x11]);
2419  DECLARE_ALIGNED(8, const uint64_t, pdat[0x04]);
2420  double ftmp[9];
2421  mips_reg addr[7];
2423 
2424  __asm__ volatile (
2425  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2426  PTR_ADDI "%[addr1], %[pix], -0x04 \n\t"
2427  PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2428  PTR_ADDU "%[addr3], %[addr0], %[addr0] \n\t"
2429  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2430  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2431  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2432  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2433  MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2434  MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2435  PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2436  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2437  PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2438  MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2439  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2440  MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2441  MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2442  PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2443  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2444  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2445  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2446  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2447  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2448  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2449  MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2450  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2451  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2452  MMI_SDC1(%[ftmp3], %[ptmp], 0x00)
2453  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2454  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2455  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2456  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2457  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2458  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2459  MMI_SDC1(%[ftmp2], %[ptmp], 0x20)
2460  MMI_LDC1(%[ftmp2], %[ptmp], 0x00)
2461  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2462  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2463  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2464  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2465  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2466  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2467  MMI_SDC1(%[ftmp0], %[ptmp], 0x00)
2468  MMI_SDC1(%[ftmp5], %[ptmp], 0x10)
2469  MMI_SDC1(%[ftmp7], %[ptmp], 0x40)
2470  MMI_SDC1(%[ftmp4], %[ptmp], 0x50)
2471  MMI_LDC1(%[ftmp8], %[ptmp], 0x20)
2472  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2473  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2474  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2475  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2476  PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2477  MMI_SDC1(%[ftmp3], %[ptmp], 0x20)
2478  MMI_SDC1(%[ftmp0], %[ptmp], 0x30)
2479  MMI_SDC1(%[ftmp6], %[ptmp], 0x60)
2480  MMI_SDC1(%[ftmp5], %[ptmp], 0x70)
2481  PTR_ADDU "%[addr1], %[addr1], %[addr5] \n\t"
2482  PTR_ADDU "%[addr4], %[addr4], %[addr5] \n\t"
2483  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2484  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2485  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2486  MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2487  MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2488  PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2489  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2490  PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2491  MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2492  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2493  MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2494  MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2495  PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2496  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2497  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2498  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2499  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2500  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2501  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2502  MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2503  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2504  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2505  MMI_SDC1(%[ftmp3], %[ptmp], 0x08)
2506  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2507  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2508  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2509  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2510  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2511  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2512  MMI_SDC1(%[ftmp2], %[ptmp], 0x28)
2513  MMI_LDC1(%[ftmp2], %[ptmp], 0x08)
2514  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2515  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2516  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2517  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2518  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2519  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2520  MMI_SDC1(%[ftmp0], %[ptmp], 0x08)
2521  MMI_SDC1(%[ftmp5], %[ptmp], 0x18)
2522  MMI_SDC1(%[ftmp7], %[ptmp], 0x48)
2523  MMI_SDC1(%[ftmp4], %[ptmp], 0x58)
2524  MMI_LDC1(%[ftmp8], %[ptmp], 0x28)
2525  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2526  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2527  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2528  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2529  MMI_SDC1(%[ftmp3], %[ptmp], 0x28)
2530  MMI_SDC1(%[ftmp0], %[ptmp], 0x38)
2531  MMI_SDC1(%[ftmp6], %[ptmp], 0x68)
2532  MMI_SDC1(%[ftmp5], %[ptmp], 0x78)
2533  PTR_S "%[addr1], 0x00(%[pdat]) \n\t"
2534  PTR_S "%[addr2], 0x08(%[pdat]) \n\t"
2535  PTR_S "%[addr0], 0x10(%[pdat]) \n\t"
2536  PTR_S "%[addr3], 0x18(%[pdat]) \n\t"
2537  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2538  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2539  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2540  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2541  [ftmp8]"=&f"(ftmp[8]),
2543  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2544  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2545  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2546  [addr6]"=&r"(addr[6])
2547  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2548  [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2549  : "memory"
2550  );
2551 
2552  ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
2553 
2554  __asm__ volatile (
2555  PTR_L "%[addr1], 0x00(%[pdat]) \n\t"
2556  PTR_L "%[addr2], 0x08(%[pdat]) \n\t"
2557  PTR_L "%[addr0], 0x10(%[pdat]) \n\t"
2558  PTR_L "%[addr3], 0x18(%[pdat]) \n\t"
2559  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2560  MMI_LDC1(%[ftmp0], %[ptmp], 0x08)
2561  MMI_LDC1(%[ftmp1], %[ptmp], 0x18)
2562  MMI_LDC1(%[ftmp2], %[ptmp], 0x28)
2563  MMI_LDC1(%[ftmp3], %[ptmp], 0x38)
2564  MMI_LDC1(%[ftmp4], %[ptmp], 0x48)
2565  MMI_LDC1(%[ftmp5], %[ptmp], 0x58)
2566  MMI_LDC1(%[ftmp6], %[ptmp], 0x68)
2567  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2568  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2569  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2570  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2571  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2572  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2573  MMI_LDC1(%[ftmp8], %[ptmp], 0x78)
2574  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2575  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2576  MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2577  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2578  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2579  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2580  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2581  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2582  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2583  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2584  MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2585  MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2586  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2587  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2588  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2589  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2590  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2591  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2592  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2593  MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2594  PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2595  MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2596  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2597  MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2598  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2599  MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2600  MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2601  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2602  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2603  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2604  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2605  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2606  MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2607  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2608  MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2609  PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2610  MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2611  PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2612  MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2613  PTR_SUBU "%[addr1], %[addr1], %[addr5] \n\t"
2614  PTR_SUBU "%[addr4], %[addr4], %[addr5] \n\t"
2615  MMI_LDC1(%[ftmp0], %[ptmp], 0x00)
2616  MMI_LDC1(%[ftmp1], %[ptmp], 0x10)
2617  MMI_LDC1(%[ftmp2], %[ptmp], 0x20)
2618  MMI_LDC1(%[ftmp3], %[ptmp], 0x30)
2619  MMI_LDC1(%[ftmp4], %[ptmp], 0x40)
2620  MMI_LDC1(%[ftmp5], %[ptmp], 0x50)
2621  MMI_LDC1(%[ftmp6], %[ptmp], 0x60)
2622  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2623  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2624  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2625  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2626  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2627  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2628  MMI_LDC1(%[ftmp8], %[ptmp], 0x70)
2629  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2630  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2631  MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2632  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2633  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2634  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2635  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2636  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2637  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2638  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2639  MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2640  MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2641  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2642  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2643  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2644  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2645  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2646  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2647  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2648  MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2649  PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2650  MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2651  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2652  MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2653  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2654  MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2655  MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2656  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2657  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2658  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2659  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2660  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2661  MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2662  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2663  MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2664  PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2665  MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2666  MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2667  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2668  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2669  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2670  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2671  [ftmp8]"=&f"(ftmp[8]),
2673  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2674  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2675  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2676  [addr6]"=&r"(addr[6])
2677  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2678  [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2679  : "memory"
2680  );
2681 }
ff_deblock_h_chroma_8_mmi
void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1907
ff_h264_idct8_add_8_mmi
void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:171
mem_internal.h
deblock_v8_luma_intra_8_mmi
static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1519
PTR_SLL
#define PTR_SLL
Definition: asmdefs.h:57
src1
const pixel * src1
Definition: h264pred_template.c:421
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
DECLARE_VAR_LOW32
#define DECLARE_VAR_LOW32
Definition: mmiutils.h:37
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
ff_h264_biweight_pixels4_8_mmi
void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1347
ff_pb_1
const union av_intfloat64 ff_pb_1
Definition: constants.c:58
ff_h264_idct_add16_8_mmi
void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[5 *8])
Definition: h264dsp_mmi.c:768
mips_reg
#define mips_reg
Definition: asmdefs.h:46
ff_h264_add_pixels4_8_mmi
void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
Definition: h264dsp_mmi.c:31
ff_pw_1
const union av_intfloat64 ff_pw_1
Definition: constants.c:25
ff_h264_weight_pixels8_8_mmi
void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1214
PTR_ADDI
#define PTR_ADDI
Definition: asmdefs.h:51
weight
const h264_weight_func weight
Definition: h264dsp_init.c:33
ff_pb_A1
const union av_intfloat64 ff_pb_A1
Definition: constants.c:61
ff_h264_biweight_pixels16_8_mmi
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1143
scan8
static const uint8_t scan8[16 *3+3]
Definition: h264_parse.h:40
mmiutils.h
ff_pw_32
const union av_intfloat64 ff_pw_32
Definition: constants.c:42
ff_deblock_v_chroma_intra_8_mmi
void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1829
h264dsp_mips.h
ff_h264_idct8_add4_8_mmi
void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[5 *8])
Definition: h264dsp_mmi.c:799
bit_depth_template.c
ff_h264_idct_add8_8_mmi
void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:816
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
height
#define height
Definition: dsp.h:85
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:102
ff_h264_biweight_pixels8_8_mmi
void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1257
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
ff_deblock_v_luma_8_mmi
void ff_deblock_v_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:2180
ff_deblock_h_chroma_intra_8_mmi
void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2047
block1
static int16_t block1[64]
Definition: dct.c:120
PTR_SUBU
#define PTR_SUBU
Definition: asmdefs.h:52
DECLARE_VAR_ALL64
#define DECLARE_VAR_ALL64
Definition: mmiutils.h:39
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
ff_h264_idct_add16intra_8_mmi
void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[5 *8])
Definition: h264dsp_mmi.c:786
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
ff_h264_idct_add_8_mmi
void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:79
ff_h264_idct_add8_422_8_mmi
void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:832
weights
static const int weights[]
Definition: hevc_pel.c:32
ff_deblock_v_luma_intra_8_mmi
void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2189
ff_deblock_v_chroma_8_mmi
void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1745
av_intfloat64::f
double f
Definition: intfloat.h:34
stride
#define stride
Definition: h264pred_template.c:537
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
ff_h264_weight_pixels4_8_mmi
void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1309
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:49
RESTRICT_ASM_LOW32
#define RESTRICT_ASM_LOW32
Definition: mmiutils.h:38
DECLARE_VAR_ADDRT
#define DECLARE_VAR_ADDRT
Definition: mmiutils.h:41
PTR_S
#define PTR_S
Definition: asmdefs.h:54
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:50
src0
const pixel *const src0
Definition: h264pred_template.c:420
ff_pb_3
const union av_intfloat64 ff_pb_3
Definition: constants.c:59
PTR_L
#define PTR_L
Definition: asmdefs.h:53
PTR_SRL
#define PTR_SRL
Definition: asmdefs.h:56
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
ff_h264_weight_pixels16_8_mmi
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1087
ff_h264_idct8_dc_add_8_mmi
void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:683
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
ff_deblock_h_luma_intra_8_mmi
void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2415
ff_deblock_v8_luma_8_mmi
void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1391
RESTRICT_ASM_ADDRT
#define RESTRICT_ASM_ADDRT
Definition: mmiutils.h:42
RESTRICT_ASM_ALL64
#define RESTRICT_ASM_ALL64
Definition: mmiutils.h:40
ff_deblock_h_luma_8_mmi
void ff_deblock_h_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:2196
src
#define src
Definition: vp8dsp.c:248
ff_h264_idct_dc_add_8_mmi
void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:639
ff_h264_luma_dc_dequant_idct_8_mmi
void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input, int qmul)
Definition: h264dsp_mmi.c:860