FFmpeg
h264dsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264dsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  * Heiher <r@hev.cc>
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
27 #include "h264dsp_mips.h"
29 #include "libavutil/mem_internal.h"
30 
31 void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
32 {
33  double ftmp[9];
34  DECLARE_VAR_LOW32;
35 
36  __asm__ volatile (
37  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
38  MMI_LDC1(%[ftmp1], %[src], 0x00)
39  MMI_LDC1(%[ftmp2], %[src], 0x08)
40  MMI_LDC1(%[ftmp3], %[src], 0x10)
41  MMI_LDC1(%[ftmp4], %[src], 0x18)
42  /* memset(src, 0, 32); */
43  "gssqc1 %[ftmp0], %[ftmp0], 0x00(%[src]) \n\t"
44  "gssqc1 %[ftmp0], %[ftmp0], 0x10(%[src]) \n\t"
45  MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
46  MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
47  MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
48  MMI_ULWC1(%[ftmp8], %[dst3], 0x00)
49  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
50  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
51  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
52  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
53  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
54  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
55  "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
56  "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
57  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
58  "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
59  "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
60  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
61  MMI_SWC1(%[ftmp1], %[dst0], 0x00)
62  MMI_SWC1(%[ftmp2], %[dst1], 0x00)
63  MMI_SWC1(%[ftmp3], %[dst2], 0x00)
64  MMI_SWC1(%[ftmp4], %[dst3], 0x00)
65  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
66  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
67  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
68  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
69  RESTRICT_ASM_LOW32
70  [ftmp8]"=&f"(ftmp[8])
71  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
72  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
73  [src]"r"(src)
74  : "memory"
75  );
76 
77 }
78 
79 void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
80 {
81  double ftmp[12];
82  uint64_t tmp[1];
83  DECLARE_VAR_LOW32;
84  DECLARE_VAR_ADDRT;
85 
86  __asm__ volatile (
87  MMI_LDC1(%[ftmp0], %[block], 0x00)
88  MMI_LDC1(%[ftmp1], %[block], 0x08)
89  MMI_LDC1(%[ftmp2], %[block], 0x10)
90  MMI_LDC1(%[ftmp3], %[block], 0x18)
91  /* memset(block, 0, 32) */
92  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
93  "gssqc1 %[ftmp4], %[ftmp4], 0x00(%[block]) \n\t"
94  "gssqc1 %[ftmp4], %[ftmp4], 0x10(%[block]) \n\t"
95  "dli %[tmp0], 0x01 \n\t"
96  "mtc1 %[tmp0], %[ftmp8] \n\t"
97  "dli %[tmp0], 0x06 \n\t"
98  "mtc1 %[tmp0], %[ftmp9] \n\t"
99  "psrah %[ftmp4], %[ftmp1], %[ftmp8] \n\t"
100  "psrah %[ftmp5], %[ftmp3], %[ftmp8] \n\t"
101  "psubh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
102  "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
103  "paddh %[ftmp10], %[ftmp2], %[ftmp0] \n\t"
104  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
105  "paddh %[ftmp11], %[ftmp5], %[ftmp10] \n\t"
106  "psubh %[ftmp2], %[ftmp10], %[ftmp5] \n\t"
107  "paddh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
108  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
109  "punpckhhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
110  "punpcklhw %[ftmp5], %[ftmp11], %[ftmp10] \n\t"
111  "punpckhhw %[ftmp4], %[ftmp0], %[ftmp2] \n\t"
112  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
113  "punpckhwd %[ftmp2], %[ftmp5], %[ftmp0] \n\t"
114  "punpcklwd %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
115  "punpcklwd %[ftmp10], %[ftmp1], %[ftmp4] \n\t"
116  "punpckhwd %[ftmp0], %[ftmp1], %[ftmp4] \n\t"
117  "paddh %[ftmp5], %[ftmp5], %[ff_pw_32] \n\t"
118  "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
119  "psrah %[ftmp3], %[ftmp0], %[ftmp8] \n\t"
120  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
121  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
122  "paddh %[ftmp1], %[ftmp10], %[ftmp5] \n\t"
123  "psubh %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
124  "paddh %[ftmp10], %[ftmp3], %[ftmp1] \n\t"
125  "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
126  "paddh %[ftmp11], %[ftmp4], %[ftmp5] \n\t"
127  "psubh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
128  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
129  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
130  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
131  "psrah %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
132  "psrah %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
133  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
134  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
135  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
136  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
137  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
138  "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
139  MMI_SWC1(%[ftmp2], %[dst], 0x00)
140  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
141  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
142  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
143  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
144  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
145  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
146  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
147  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
148  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
149  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
150  "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
151  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
152  MMI_SWC1(%[ftmp2], %[dst], 0x00)
153  "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
154  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
155  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
156  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
157  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
158  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
159  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
160  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
161  RESTRICT_ASM_LOW32
162  RESTRICT_ASM_ADDRT
163  [tmp0]"=&r"(tmp[0])
164  : [dst]"r"(dst), [block]"r"(block),
165  [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32)
166  : "memory"
167  );
168 
169 }
170 
171 void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
172 {
173  double ftmp[16];
174  uint64_t tmp[7];
175  mips_reg addr[1];
176  DECLARE_VAR_LOW32;
177  DECLARE_VAR_ADDRT;
178 
179  __asm__ volatile (
180  "lhu %[tmp0], 0x00(%[block]) \n\t"
181  PTR_ADDI "$sp, $sp, -0x20 \n\t"
182  PTR_ADDIU "%[tmp0], %[tmp0], 0x20 \n\t"
183  MMI_LDC1(%[ftmp1], %[block], 0x10)
184  "sh %[tmp0], 0x00(%[block]) \n\t"
185  MMI_LDC1(%[ftmp2], %[block], 0x20)
186  "dli %[tmp0], 0x01 \n\t"
187  MMI_LDC1(%[ftmp3], %[block], 0x30)
188  "mtc1 %[tmp0], %[ftmp8] \n\t"
189  MMI_LDC1(%[ftmp5], %[block], 0x50)
190  MMI_LDC1(%[ftmp6], %[block], 0x60)
191  MMI_LDC1(%[ftmp7], %[block], 0x70)
192  "mov.d %[ftmp0], %[ftmp1] \n\t"
193  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
194  "psrah %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
195  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
196  "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
197  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
198  "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
199  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
200  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
201  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
202  "psubh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
203  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
204  "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
205  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
206  "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
207  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
208  "dli %[tmp0], 0x02 \n\t"
209  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
210  "mtc1 %[tmp0], %[ftmp9] \n\t"
211  "mov.d %[ftmp7], %[ftmp1] \n\t"
212  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
213  "psrah %[ftmp3], %[ftmp4], %[ftmp9] \n\t"
214  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
215  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
216  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
217  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
218  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
219  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
220  "mov.d %[ftmp5], %[ftmp6] \n\t"
221  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
222  "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
223  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
224  "psubh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
225  MMI_LDC1(%[ftmp2], %[block], 0x00)
226  MMI_LDC1(%[ftmp5], %[block], 0x40)
227  "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
228  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
229  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
230  "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
231  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
232  "paddh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
233  "psubh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
234  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
235  "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
236  "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
237  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
238  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
239  "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
240  "paddh %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
241  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
242  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
243  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
244  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
245  "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
246  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
247  MMI_SDC1(%[ftmp6], %[block], 0x00)
248  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
249  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
250  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
251  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp1] \n\t"
252  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
253  "punpckhwd %[ftmp1], %[ftmp7], %[ftmp3] \n\t"
254  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
255  "punpckhwd %[ftmp3], %[ftmp6], %[ftmp0] \n\t"
256  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
257  MMI_LDC1(%[ftmp0], %[block], 0x00)
258  MMI_SDC1(%[ftmp7], $sp, 0x00)
259  MMI_SDC1(%[ftmp1], $sp, 0x10)
260  "dmfc1 %[tmp1], %[ftmp6] \n\t"
261  "dmfc1 %[tmp3], %[ftmp3] \n\t"
262  "punpckhhw %[ftmp3], %[ftmp5], %[ftmp2] \n\t"
263  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
264  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp0] \n\t"
265  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
266  "punpckhwd %[ftmp0], %[ftmp5], %[ftmp4] \n\t"
267  "punpcklwd %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
268  "punpckhwd %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
269  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
270  MMI_SDC1(%[ftmp5], $sp, 0x08)
271  MMI_SDC1(%[ftmp0], $sp, 0x18)
272  "dmfc1 %[tmp2], %[ftmp3] \n\t"
273  "dmfc1 %[tmp4], %[ftmp4] \n\t"
274  MMI_LDC1(%[ftmp1], %[block], 0x18)
275  MMI_LDC1(%[ftmp6], %[block], 0x28)
276  MMI_LDC1(%[ftmp2], %[block], 0x38)
277  MMI_LDC1(%[ftmp0], %[block], 0x58)
278  MMI_LDC1(%[ftmp3], %[block], 0x68)
279  MMI_LDC1(%[ftmp4], %[block], 0x78)
280  "mov.d %[ftmp7], %[ftmp1] \n\t"
281  "psrah %[ftmp5], %[ftmp0], %[ftmp8] \n\t"
282  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
283  "paddh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
284  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
285  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
286  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
287  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
288  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
289  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
290  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
291  "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
292  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
293  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
294  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
295  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
296  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
297  "mov.d %[ftmp4], %[ftmp1] \n\t"
298  "psrah %[ftmp2], %[ftmp5], %[ftmp9] \n\t"
299  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
300  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
301  "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
302  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
303  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
304  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
305  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
306  "mov.d %[ftmp0], %[ftmp3] \n\t"
307  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
308  "psrah %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
309  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
310  "psubh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
311  MMI_LDC1(%[ftmp6], %[block], 0x08)
312  MMI_LDC1(%[ftmp0], %[block], 0x48)
313  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
314  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
315  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
316  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
317  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
318  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
319  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
320  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
321  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
322  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
323  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
324  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
325  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
326  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
327  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
328  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
329  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
330  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
331  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
332  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
333  MMI_SDC1(%[ftmp3], %[block], 0x08)
334  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
335  "punpckhhw %[ftmp3], %[ftmp4], %[ftmp7] \n\t"
336  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
337  "punpckhhw %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
338  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
339  "punpckhwd %[ftmp1], %[ftmp4], %[ftmp2] \n\t"
340  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
341  "punpckhwd %[ftmp2], %[ftmp3], %[ftmp7] \n\t"
342  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
343  MMI_LDC1(%[ftmp7], %[block], 0x08)
344  "dmfc1 %[tmp5], %[ftmp4] \n\t"
345  "mov.d %[ftmp10], %[ftmp1] \n\t"
346  "mov.d %[ftmp12], %[ftmp3] \n\t"
347  "mov.d %[ftmp14], %[ftmp2] \n\t"
348  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp6] \n\t"
349  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
350  "punpckhhw %[ftmp6], %[ftmp5], %[ftmp7] \n\t"
351  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
352  "punpckhwd %[ftmp7], %[ftmp0], %[ftmp5] \n\t"
353  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
354  "punpckhwd %[ftmp5], %[ftmp2], %[ftmp6] \n\t"
355  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
356  "dmfc1 %[tmp6], %[ftmp0] \n\t"
357  "mov.d %[ftmp11], %[ftmp7] \n\t"
358  "mov.d %[ftmp13], %[ftmp2] \n\t"
359  "mov.d %[ftmp15], %[ftmp5] \n\t"
360  PTR_ADDIU "%[addr0], %[dst], 0x04 \n\t"
361  "mov.d %[ftmp7], %[ftmp10] \n\t"
362  "dmtc1 %[tmp3], %[ftmp6] \n\t"
363  MMI_LDC1(%[ftmp1], $sp, 0x10)
364  "dmtc1 %[tmp1], %[ftmp3] \n\t"
365  "mov.d %[ftmp4], %[ftmp1] \n\t"
366  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
367  "psrah %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
368  "paddh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
369  "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
370  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
371  "paddh %[ftmp0], %[ftmp0], %[ftmp14] \n\t"
372  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
373  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
374  "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
375  "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
376  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
377  "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t"
378  "psubh %[ftmp7], %[ftmp7], %[ftmp14] \n\t"
379  "psrah %[ftmp5], %[ftmp14], %[ftmp8] \n\t"
380  "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
381  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
382  "mov.d %[ftmp5], %[ftmp1] \n\t"
383  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
384  "psrah %[ftmp6], %[ftmp0], %[ftmp9] \n\t"
385  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
386  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
387  "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
388  "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
389  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
390  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
391  "mov.d %[ftmp7], %[ftmp12] \n\t"
392  "psrah %[ftmp2], %[ftmp12], %[ftmp8] \n\t"
393  "psrah %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
394  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
395  "psubh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
396  MMI_LDC1(%[ftmp3], $sp, 0x00)
397  "dmtc1 %[tmp5], %[ftmp7] \n\t"
398  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
399  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
400  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
401  "psubh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
402  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
403  "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
404  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
405  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
406  "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
407  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
408  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
409  "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
410  "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
411  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
412  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
413  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
414  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
415  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
416  "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
417  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
418  MMI_SDC1(%[ftmp3], $sp, 0x00)
419  "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
420  MMI_SDC1(%[ftmp0], $sp, 0x10)
421  "dmfc1 %[tmp1], %[ftmp2] \n\t"
422  "xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
423  MMI_SDC1(%[ftmp2], %[block], 0x00)
424  MMI_SDC1(%[ftmp2], %[block], 0x08)
425  MMI_SDC1(%[ftmp2], %[block], 0x10)
426  MMI_SDC1(%[ftmp2], %[block], 0x18)
427  MMI_SDC1(%[ftmp2], %[block], 0x20)
428  MMI_SDC1(%[ftmp2], %[block], 0x28)
429  MMI_SDC1(%[ftmp2], %[block], 0x30)
430  MMI_SDC1(%[ftmp2], %[block], 0x38)
431  MMI_SDC1(%[ftmp2], %[block], 0x40)
432  MMI_SDC1(%[ftmp2], %[block], 0x48)
433  MMI_SDC1(%[ftmp2], %[block], 0x50)
434  MMI_SDC1(%[ftmp2], %[block], 0x58)
435  MMI_SDC1(%[ftmp2], %[block], 0x60)
436  MMI_SDC1(%[ftmp2], %[block], 0x68)
437  MMI_SDC1(%[ftmp2], %[block], 0x70)
438  MMI_SDC1(%[ftmp2], %[block], 0x78)
439  "dli %[tmp3], 0x06 \n\t"
440  "mtc1 %[tmp3], %[ftmp10] \n\t"
441  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
442  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
443  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
444  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
445  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
446  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
447  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
448  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
449  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
450  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
451  MMI_SWC1(%[ftmp3], %[dst], 0x00)
452  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
453  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
454  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
455  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
456  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
457  "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
458  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
459  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
460  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
461  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
462  "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
463  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
464  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
465  MMI_SWC1(%[ftmp3], %[dst], 0x00)
466  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
467  MMI_LDC1(%[ftmp5], $sp, 0x00)
468  MMI_LDC1(%[ftmp4], $sp, 0x10)
469  "dmtc1 %[tmp1], %[ftmp6] \n\t"
470  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
471  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
472  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
473  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
474  "psrah %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
475  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
476  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
477  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
478  "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
479  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
480  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
481  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
482  MMI_SWC1(%[ftmp3], %[dst], 0x00)
483  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
484  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
485  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
486  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
487  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
488  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
489  "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
490  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
491  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
492  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
493  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
494  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
495  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
496  MMI_SWC1(%[ftmp3], %[dst], 0x00)
497  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
498  "dmtc1 %[tmp4], %[ftmp1] \n\t"
499  "dmtc1 %[tmp2], %[ftmp6] \n\t"
500  MMI_LDC1(%[ftmp4], $sp, 0x18)
501  "mov.d %[ftmp5], %[ftmp4] \n\t"
502  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
503  "psrah %[ftmp7], %[ftmp11], %[ftmp8] \n\t"
504  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
505  "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
506  "paddh %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
507  "paddh %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
508  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
509  "paddh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
510  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
511  "psubh %[ftmp3], %[ftmp11], %[ftmp1] \n\t"
512  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
513  "paddh %[ftmp5], %[ftmp5], %[ftmp15] \n\t"
514  "psubh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
515  "psrah %[ftmp2], %[ftmp15], %[ftmp8] \n\t"
516  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
517  "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
518  "mov.d %[ftmp2], %[ftmp4] \n\t"
519  "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
520  "psrah %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
521  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
522  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
523  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
524  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
525  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
526  "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
527  "mov.d %[ftmp3], %[ftmp13] \n\t"
528  "psrah %[ftmp0], %[ftmp13], %[ftmp8] \n\t"
529  "psrah %[ftmp7], %[ftmp6], %[ftmp8] \n\t"
530  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
531  "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
532  MMI_LDC1(%[ftmp6], $sp, 0x08)
533  "dmtc1 %[tmp6], %[ftmp3] \n\t"
534  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
535  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
536  "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
537  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
538  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
539  "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
540  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
541  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
542  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
543  "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
544  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
545  "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
546  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
547  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
548  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
549  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
550  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
551  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
552  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
553  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
554  MMI_SDC1(%[ftmp6], $sp, 0x08)
555  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
556  MMI_SDC1(%[ftmp7], $sp, 0x18)
557  "dmfc1 %[tmp2], %[ftmp0] \n\t"
558  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
559  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
560  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
561  "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
562  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
563  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
564  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
565  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
566  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
567  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
568  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
569  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
570  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
571  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
572  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
573  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
574  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
575  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
576  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
577  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
578  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
579  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
580  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
581  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
582  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
583  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
584  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
585  MMI_LDC1(%[ftmp2], $sp, 0x08)
586  MMI_LDC1(%[ftmp5], $sp, 0x18)
587  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
588  "dmtc1 %[tmp2], %[ftmp1] \n\t"
589  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
590  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
591  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
592  "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
593  "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
594  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
595  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
596  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
597  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
598  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
599  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
600  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
601  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
602  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
603  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
604  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
605  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
606  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
607  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
608  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
609  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
610  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
611  "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
612  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
613  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
614  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
615  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
616  PTR_ADDIU "$sp, $sp, 0x20 \n\t"
617  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
618  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
619  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
620  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
621  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
622  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
623  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
624  [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
625  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
626  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
627  [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
628  [tmp6]"=&r"(tmp[6]),
629  RESTRICT_ASM_LOW32
630  RESTRICT_ASM_ADDRT
631  [addr0]"=&r"(addr[0])
632  : [dst]"r"(dst), [block]"r"(block),
633  [stride]"r"((mips_reg)stride)
634  : "memory"
635  );
636 
637 }
638 
640 {
641  int dc = (block[0] + 32) >> 6;
642  double ftmp[6];
643  DECLARE_VAR_LOW32;
644 
645  block[0] = 0;
646 
647  __asm__ volatile (
648  "mtc1 %[dc], %[ftmp5] \n\t"
649  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
650  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
651  MMI_ULWC1(%[ftmp1], %[dst0], 0x00)
652  MMI_ULWC1(%[ftmp2], %[dst1], 0x00)
653  MMI_ULWC1(%[ftmp3], %[dst2], 0x00)
654  MMI_ULWC1(%[ftmp4], %[dst3], 0x00)
655  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
656  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
657  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
658  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
659  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
660  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
661  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
662  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
663  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
664  "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
665  "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
666  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
667  MMI_SWC1(%[ftmp1], %[dst0], 0x00)
668  MMI_SWC1(%[ftmp2], %[dst1], 0x00)
669  MMI_SWC1(%[ftmp3], %[dst2], 0x00)
670  MMI_SWC1(%[ftmp4], %[dst3], 0x00)
671  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
672  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
673  [ftmp4]"=&f"(ftmp[4]),
674  RESTRICT_ASM_LOW32
675  [ftmp5]"=&f"(ftmp[5])
676  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
677  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
678  [dc]"r"(dc)
679  : "memory"
680  );
681 }
682 
684 {
685  int dc = (block[0] + 32) >> 6;
686  double ftmp[10];
687  DECLARE_VAR_ALL64;
688 
689  block[0] = 0;
690 
691  __asm__ volatile (
692  "mtc1 %[dc], %[ftmp5] \n\t"
693  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
694  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
695  MMI_LDC1(%[ftmp1], %[dst0], 0x00)
696  MMI_LDC1(%[ftmp2], %[dst1], 0x00)
697  MMI_LDC1(%[ftmp3], %[dst2], 0x00)
698  MMI_LDC1(%[ftmp4], %[dst3], 0x00)
699  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
700  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
701  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
702  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
703  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
704  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
705  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
706  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
707  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
708  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
709  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
710  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
711  "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
712  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
713  "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
714  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
715  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
716  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
717  "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
718  "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
719  MMI_SDC1(%[ftmp1], %[dst0], 0x00)
720  MMI_SDC1(%[ftmp2], %[dst1], 0x00)
721  MMI_SDC1(%[ftmp3], %[dst2], 0x00)
722  MMI_SDC1(%[ftmp4], %[dst3], 0x00)
723 
724  MMI_LDC1(%[ftmp1], %[dst4], 0x00)
725  MMI_LDC1(%[ftmp2], %[dst5], 0x00)
726  MMI_LDC1(%[ftmp3], %[dst6], 0x00)
727  MMI_LDC1(%[ftmp4], %[dst7], 0x00)
728  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
729  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
730  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
731  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
732  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
733  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
734  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
735  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
736  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
737  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
738  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
739  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
740  "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
741  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
742  "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
743  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
744  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
745  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
746  "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
747  "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
748  MMI_SDC1(%[ftmp1], %[dst4], 0x00)
749  MMI_SDC1(%[ftmp2], %[dst5], 0x00)
750  MMI_SDC1(%[ftmp3], %[dst6], 0x00)
751  MMI_SDC1(%[ftmp4], %[dst7], 0x00)
752  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
753  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
754  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
755  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
756  [ftmp8]"=&f"(ftmp[8]),
757  RESTRICT_ASM_ALL64
758  [ftmp9]"=&f"(ftmp[9])
759  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
760  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
761  [dst4]"r"(dst+4*stride), [dst5]"r"(dst+5*stride),
762  [dst6]"r"(dst+6*stride), [dst7]"r"(dst+7*stride),
763  [dc]"r"(dc)
764  : "memory"
765  );
766 }
767 
768 void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
769  int16_t *block, int stride, const uint8_t nnzc[15*8])
770 {
771  int i;
772  for(i=0; i<16; i++){
773  int nnz = nnzc[ scan8[i] ];
774  if(nnz){
775  if(nnz==1 && ((int16_t*)block)[i*16])
776  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
777  stride);
778  else
779  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
780  stride);
781  }
782  }
783 }
784 
785 void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
786  int16_t *block, int stride, const uint8_t nnzc[15*8])
787 {
788  int i;
789  for(i=0; i<16; i++){
790  if(nnzc[ scan8[i] ])
791  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
792  else if(((int16_t*)block)[i*16])
793  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
794  stride);
795  }
796 }
797 
798 void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
799  int16_t *block, int stride, const uint8_t nnzc[15*8])
800 {
801  int i;
802  for(i=0; i<16; i+=4){
803  int nnz = nnzc[ scan8[i] ];
804  if(nnz){
805  if(nnz==1 && ((int16_t*)block)[i*16])
806  ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
807  block + i*16, stride);
808  else
809  ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
810  stride);
811  }
812  }
813 }
814 
815 void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
816  int16_t *block, int stride, const uint8_t nnzc[15*8])
817 {
818  int i, j;
819  for(j=1; j<3; j++){
820  for(i=j*16; i<j*16+4; i++){
821  if(nnzc[ scan8[i] ])
822  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
823  block + i*16, stride);
824  else if(((int16_t*)block)[i*16])
825  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
826  block + i*16, stride);
827  }
828  }
829 }
830 
831 void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
832  int16_t *block, int stride, const uint8_t nnzc[15*8])
833 {
834  int i, j;
835 
836  for(j=1; j<3; j++){
837  for(i=j*16; i<j*16+4; i++){
838  if(nnzc[ scan8[i] ])
839  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
840  block + i*16, stride);
841  else if(((int16_t*)block)[i*16])
842  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
843  block + i*16, stride);
844  }
845  }
846 
847  for(j=1; j<3; j++){
848  for(i=j*16+4; i<j*16+8; i++){
849  if(nnzc[ scan8[i+4] ])
850  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
851  block + i*16, stride);
852  else if(((int16_t*)block)[i*16])
853  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
854  block + i*16, stride);
855  }
856  }
857 }
858 
860  int qmul)
861 {
862  double ftmp[10];
863  uint64_t tmp[2];
864  DECLARE_VAR_ALL64;
865 
866  __asm__ volatile (
867  ".set noreorder \n\t"
868  "dli %[tmp0], 0x08 \n\t"
869  MMI_LDC1(%[ftmp3], %[input], 0x18)
870  "mtc1 %[tmp0], %[ftmp8] \n\t"
871  MMI_LDC1(%[ftmp2], %[input], 0x10)
872  "dli %[tmp0], 0x20 \n\t"
873  MMI_LDC1(%[ftmp1], %[input], 0x08)
874  "mtc1 %[tmp0], %[ftmp9] \n\t"
875  MMI_LDC1(%[ftmp0], %[input], 0x00)
876  "mov.d %[ftmp4], %[ftmp3] \n\t"
877  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
878  "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
879  "mov.d %[ftmp4], %[ftmp1] \n\t"
880  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
881  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
882  "mov.d %[ftmp4], %[ftmp3] \n\t"
883  "paddh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
884  "psubh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
885  "mov.d %[ftmp4], %[ftmp2] \n\t"
886  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
887  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
888  "mov.d %[ftmp4], %[ftmp3] \n\t"
889  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
890  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
891  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
892  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
893  "punpckhwd %[ftmp2], %[ftmp3], %[ftmp0] \n\t"
894  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
895  "mov.d %[ftmp0], %[ftmp4] \n\t"
896  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
897  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
898  "mov.d %[ftmp1], %[ftmp0] \n\t"
899  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
900  "psubh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
901  "mov.d %[ftmp1], %[ftmp2] \n\t"
902  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
903  "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
904  "mov.d %[ftmp1], %[ftmp0] \n\t"
905  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
906  "psubh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
907  "mov.d %[ftmp1], %[ftmp4] \n\t"
908  "daddi %[tmp0], %[qmul], -0x7fff \n\t"
909  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
910  "bgtz %[tmp0], 1f \n\t"
911  "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
912  "ori %[tmp0], $0, 0x80 \n\t"
913  "dsll %[tmp0], %[tmp0], 0x10 \n\t"
914  "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
915  "daddu %[qmul], %[qmul], %[tmp0] \n\t"
916  "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
917  "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
918  "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
919  "mtc1 %[qmul], %[ftmp7] \n\t"
920  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
921  "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
922  "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
923  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
924  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
925  "psraw %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
926  "psraw %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
927  "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
928  "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
929  "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
930  "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
931  "dmfc1 %[tmp1], %[ftmp0] \n\t"
932  "dsrl %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
933  "mfc1 %[input], %[ftmp0] \n\t"
934  "sh %[tmp1], 0x00(%[output]) \n\t"
935  "sh %[input], 0x80(%[output]) \n\t"
936  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
937  PTR_SRL "%[input], %[input], 0x10 \n\t"
938  "sh %[tmp1], 0x20(%[output]) \n\t"
939  "sh %[input], 0xa0(%[output]) \n\t"
940  "dmfc1 %[tmp1], %[ftmp2] \n\t"
941  "dsrl %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
942  "mfc1 %[input], %[ftmp2] \n\t"
943  "sh %[tmp1], 0x40(%[output]) \n\t"
944  "sh %[input], 0xc0(%[output]) \n\t"
945  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
946  PTR_SRL "%[input], %[input], 0x10 \n\t"
947  "sh %[tmp1], 0x60(%[output]) \n\t"
948  "sh %[input], 0xe0(%[output]) \n\t"
949  "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
950  "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
951  "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
952  "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
953  "mtc1 %[qmul], %[ftmp7] \n\t"
954  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
955  "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
956  "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
957  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
958  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
959  "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
960  "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
961  "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
962  "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
963  "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
964  "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
965  "dmfc1 %[tmp1], %[ftmp3] \n\t"
966  "dsrl %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
967  "mfc1 %[input], %[ftmp3] \n\t"
968  "sh %[tmp1], 0x100(%[output]) \n\t"
969  "sh %[input], 0x180(%[output]) \n\t"
970  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
971  PTR_SRL "%[input], %[input], 0x10 \n\t"
972  "sh %[tmp1], 0x120(%[output]) \n\t"
973  "sh %[input], 0x1a0(%[output]) \n\t"
974  "dmfc1 %[tmp1], %[ftmp4] \n\t"
975  "dsrl %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
976  "mfc1 %[input], %[ftmp4] \n\t"
977  "sh %[tmp1], 0x140(%[output]) \n\t"
978  "sh %[input], 0x1c0(%[output]) \n\t"
979  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
980  PTR_SRL "%[input], %[input], 0x10 \n\t"
981  "sh %[tmp1], 0x160(%[output]) \n\t"
982  "j 2f \n\t"
983  "sh %[input], 0x1e0(%[output]) \n\t"
984  "1: \n\t"
985  "ori %[tmp0], $0, 0x1f \n\t"
986 #if HAVE_LOONGSON3
987  "clz %[tmp1], %[qmul] \n\t"
988 #elif HAVE_LOONGSON2
989 #endif
990  "ori %[input], $0, 0x07 \n\t"
991  "dsubu %[tmp1], %[tmp0], %[tmp1] \n\t"
992  "ori %[tmp0], $0, 0x80 \n\t"
993  "dsll %[tmp0], %[tmp0], 0x10 \n\t"
994  "daddu %[qmul], %[qmul], %[tmp0] \n\t"
995  "dsubu %[tmp0], %[tmp1], %[input] \n\t"
996  "movn %[tmp1], %[input], %[tmp0] \n\t"
997  PTR_ADDIU "%[input], %[input], 0x01 \n\t"
998  "andi %[tmp0], %[tmp1], 0xff \n\t"
999  "srlv %[qmul], %[qmul], %[tmp0] \n\t"
1000  PTR_SUBU "%[input], %[input], %[tmp1] \n\t"
1001  "mtc1 %[input], %[ftmp6] \n\t"
1002  "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
1003  "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
1004  "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
1005  "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
1006  "mtc1 %[qmul], %[ftmp7] \n\t"
1007  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1008  "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1009  "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1010  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1011  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1012  "psraw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1013  "psraw %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1014  "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1015  "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1016  "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1017  "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1018  "dmfc1 %[tmp1], %[ftmp0] \n\t"
1019  "dsrl %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1020  "sh %[tmp1], 0x00(%[output]) \n\t"
1021  "mfc1 %[input], %[ftmp0] \n\t"
1022  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1023  "sh %[input], 0x80(%[output]) \n\t"
1024  "sh %[tmp1], 0x20(%[output]) \n\t"
1025  PTR_SRL "%[input], %[input], 0x10 \n\t"
1026  "dmfc1 %[tmp1], %[ftmp2] \n\t"
1027  "sh %[input], 0xa0(%[output]) \n\t"
1028  "dsrl %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
1029  "sh %[tmp1], 0x40(%[output]) \n\t"
1030  "mfc1 %[input], %[ftmp2] \n\t"
1031  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1032  "sh %[input], 0xc0(%[output]) \n\t"
1033  "sh %[tmp1], 0x60(%[output]) \n\t"
1034  PTR_SRL "%[input], %[input], 0x10 \n\t"
1035  "sh %[input], 0xe0(%[output]) \n\t"
1036  "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
1037  "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
1038  "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
1039  "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
1040  "mtc1 %[qmul], %[ftmp7] \n\t"
1041  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1042  "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1043  "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1044  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1045  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1046  "psraw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1047  "psraw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1048  "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1049  "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1050  "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1051  "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1052  "dmfc1 %[tmp1], %[ftmp3] \n\t"
1053  "dsrl %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1054  "mfc1 %[input], %[ftmp3] \n\t"
1055  "sh %[tmp1], 0x100(%[output]) \n\t"
1056  "sh %[input], 0x180(%[output]) \n\t"
1057  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1058  PTR_SRL "%[input], %[input], 0x10 \n\t"
1059  "sh %[tmp1], 0x120(%[output]) \n\t"
1060  "sh %[input], 0x1a0(%[output]) \n\t"
1061  "dmfc1 %[tmp1], %[ftmp4] \n\t"
1062  "dsrl %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1063  "mfc1 %[input], %[ftmp4] \n\t"
1064  "sh %[tmp1], 0x140(%[output]) \n\t"
1065  "sh %[input], 0x1c0(%[output]) \n\t"
1066  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1067  PTR_SRL "%[input], %[input], 0x10 \n\t"
1068  "sh %[tmp1], 0x160(%[output]) \n\t"
1069  "sh %[input], 0x1e0(%[output]) \n\t"
1070  "2: \n\t"
1071  ".set reorder \n\t"
1072  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1073  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1074  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1075  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1076  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1077  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
1078  RESTRICT_ASM_ALL64
1079  [output]"+&r"(output), [input]"+&r"(input),
1080  [qmul]"+&r"(qmul)
1081  : [ff_pw_1]"f"(ff_pw_1)
1082  : "memory"
1083  );
1084 }
1085 
1087 {
1088  int temp[8];
1089  int t[8];
1090 
1091  temp[0] = block[0] + block[16];
1092  temp[1] = block[0] - block[16];
1093  temp[2] = block[32] + block[48];
1094  temp[3] = block[32] - block[48];
1095  temp[4] = block[64] + block[80];
1096  temp[5] = block[64] - block[80];
1097  temp[6] = block[96] + block[112];
1098  temp[7] = block[96] - block[112];
1099 
1100  t[0] = temp[0] + temp[4] + temp[2] + temp[6];
1101  t[1] = temp[0] - temp[4] + temp[2] - temp[6];
1102  t[2] = temp[0] - temp[4] - temp[2] + temp[6];
1103  t[3] = temp[0] + temp[4] - temp[2] - temp[6];
1104  t[4] = temp[1] + temp[5] + temp[3] + temp[7];
1105  t[5] = temp[1] - temp[5] + temp[3] - temp[7];
1106  t[6] = temp[1] - temp[5] - temp[3] + temp[7];
1107  t[7] = temp[1] + temp[5] - temp[3] - temp[7];
1108 
1109  block[ 0]= (t[0]*qmul + 128) >> 8;
1110  block[ 32]= (t[1]*qmul + 128) >> 8;
1111  block[ 64]= (t[2]*qmul + 128) >> 8;
1112  block[ 96]= (t[3]*qmul + 128) >> 8;
1113  block[ 16]= (t[4]*qmul + 128) >> 8;
1114  block[ 48]= (t[5]*qmul + 128) >> 8;
1115  block[ 80]= (t[6]*qmul + 128) >> 8;
1116  block[112]= (t[7]*qmul + 128) >> 8;
1117 }
1118 
1120 {
1121  int a,b,c,d;
1122 
1123  d = block[0] - block[16];
1124  a = block[0] + block[16];
1125  b = block[32] - block[48];
1126  c = block[32] + block[48];
1127  block[0] = ((a+c)*qmul) >> 7;
1128  block[16]= ((d+b)*qmul) >> 7;
1129  block[32]= ((a-c)*qmul) >> 7;
1130  block[48]= ((d-b)*qmul) >> 7;
1131 }
1132 
1134  int log2_denom, int weight, int offset)
1135 {
1136  int y;
1137  double ftmp[8];
1138  DECLARE_VAR_ALL64;
1139 
1140  offset <<= log2_denom;
1141 
1142  if (log2_denom)
1143  offset += 1 << (log2_denom - 1);
1144 
1145  for (y=0; y<height; y++, block+=stride) {
1146  __asm__ volatile (
1147  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1148  MMI_LDC1(%[ftmp1], %[block0], 0x00)
1149  MMI_LDC1(%[ftmp2], %[block1], 0x00)
1150  "mtc1 %[weight], %[ftmp3] \n\t"
1151  "mtc1 %[offset], %[ftmp4] \n\t"
1152  "mtc1 %[log2_denom], %[ftmp5] \n\t"
1153  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1154  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1155  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
1156  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
1157  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1158  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1159  "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1160  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1161  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1162  "pmullh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1163  "paddsh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1164  "paddsh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1165  "paddsh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1166  "paddsh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1167  "psrah %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1168  "psrah %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1169  "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1170  "psrah %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1171  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1172  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1173  MMI_SDC1(%[ftmp1], %[block0], 0x00)
1174  MMI_SDC1(%[ftmp2], %[block1], 0x00)
1175  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1176  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1177  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1178  [ftmp6]"=&f"(ftmp[6]),
1179  RESTRICT_ASM_ALL64
1180  [ftmp7]"=&f"(ftmp[7])
1181  : [block0]"r"(block), [block1]"r"(block+8),
1182  [weight]"r"(weight), [offset]"r"(offset),
1183  [log2_denom]"r"(log2_denom)
1184  : "memory"
1185  );
1186  }
1187 }
1188 
1190  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1191  int offset)
1192 {
1193  int y;
1194  double ftmp[9];
1195  DECLARE_VAR_ALL64;
1196 
1197  offset = ((offset + 1) | 1) << log2_denom;
1198 
1199  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1200  __asm__ volatile (
1201  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1202  MMI_LDC1(%[ftmp1], %[src0], 0x00)
1203  MMI_LDC1(%[ftmp2], %[dst0], 0x00)
1204  "mtc1 %[weights], %[ftmp3] \n\t"
1205  "mtc1 %[weightd], %[ftmp4] \n\t"
1206  "mtc1 %[offset], %[ftmp5] \n\t"
1207  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1208  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1209  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1210  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1211  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1212  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1213  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1214  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1215  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1216  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1217  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1218  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1219  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1220  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1221  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1222  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1223  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1224  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1225  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1226  MMI_SDC1(%[ftmp1], %[dst0], 0x00)
1227  MMI_LDC1(%[ftmp1], %[src1], 0x00)
1228  MMI_LDC1(%[ftmp2], %[dst1], 0x00)
1229  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1230  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1231  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1232  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1233  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1234  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1235  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1236  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1237  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1238  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1239  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1240  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1241  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1242  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1243  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1244  MMI_SDC1(%[ftmp1], %[dst1], 0x00)
1245  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1246  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1247  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1248  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1249  RESTRICT_ASM_ALL64
1250  [ftmp8]"=&f"(ftmp[8])
1251  : [dst0]"r"(dst), [dst1]"r"(dst+8),
1252  [src0]"r"(src), [src1]"r"(src+8),
1253  [weights]"r"(weights), [weightd]"r"(weightd),
1254  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1255  : "memory"
1256  );
1257  }
1258 }
1259 
1261  int log2_denom, int weight, int offset)
1262 {
1263  int y;
1264  double ftmp[6];
1265  DECLARE_VAR_ALL64;
1266 
1267  offset <<= log2_denom;
1268 
1269  if (log2_denom)
1270  offset += 1 << (log2_denom - 1);
1271 
1272  for (y=0; y<height; y++, block+=stride) {
1273  __asm__ volatile (
1274  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1275  MMI_LDC1(%[ftmp1], %[block], 0x00)
1276  "mtc1 %[weight], %[ftmp2] \n\t"
1277  "mtc1 %[offset], %[ftmp3] \n\t"
1278  "mtc1 %[log2_denom], %[ftmp5] \n\t"
1279  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1280  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1281  "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
1282  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1283  "pmullh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
1284  "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1285  "paddsh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1286  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1287  "psrah %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1288  "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1289  "packushb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1290  MMI_SDC1(%[ftmp1], %[block], 0x00)
1291  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1292  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1293  [ftmp4]"=&f"(ftmp[4]),
1294  RESTRICT_ASM_ALL64
1295  [ftmp5]"=&f"(ftmp[5])
1296  : [block]"r"(block), [weight]"r"(weight),
1297  [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1298  : "memory"
1299  );
1300  }
1301 }
1302 
1304  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1305  int offset)
1306 {
1307  int y;
1308  double ftmp[9];
1309  DECLARE_VAR_ALL64;
1310 
1311  offset = ((offset + 1) | 1) << log2_denom;
1312 
1313  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1314  __asm__ volatile (
1315  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1316  MMI_LDC1(%[ftmp1], %[src], 0x00)
1317  MMI_LDC1(%[ftmp2], %[dst], 0x00)
1318  "mtc1 %[weights], %[ftmp3] \n\t"
1319  "mtc1 %[weightd], %[ftmp4] \n\t"
1320  "mtc1 %[offset], %[ftmp5] \n\t"
1321  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1322  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1323  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1324  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1325  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1326  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1327  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1328  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1329  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1330  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1331  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1332  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1333  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1334  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1335  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1336  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1337  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1338  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1339  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1340  MMI_SDC1(%[ftmp1], %[dst], 0x00)
1341  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1342  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1343  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1344  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1345  RESTRICT_ASM_ALL64
1346  [ftmp8]"=&f"(ftmp[8])
1347  : [dst]"r"(dst), [src]"r"(src),
1348  [weights]"r"(weights), [weightd]"r"(weightd),
1349  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1350  : "memory"
1351  );
1352  }
1353 }
1354 
1356  int log2_denom, int weight, int offset)
1357 {
1358  int y;
1359  double ftmp[5];
1360  DECLARE_VAR_LOW32;
1361 
1362  offset <<= log2_denom;
1363 
1364  if (log2_denom)
1365  offset += 1 << (log2_denom - 1);
1366 
1367  for (y=0; y<height; y++, block+=stride) {
1368  __asm__ volatile (
1369  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1370  MMI_ULWC1(%[ftmp1], %[block], 0x00)
1371  "mtc1 %[weight], %[ftmp2] \n\t"
1372  "mtc1 %[offset], %[ftmp3] \n\t"
1373  "mtc1 %[log2_denom], %[ftmp4] \n\t"
1374  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1375  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1376  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1377  "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1378  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1379  "psrah %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1380  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1381  MMI_SWC1(%[ftmp1], %[block], 0x00)
1382  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1383  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1384  RESTRICT_ASM_LOW32
1385  [ftmp4]"=&f"(ftmp[4])
1386  : [block]"r"(block), [weight]"r"(weight),
1387  [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1388  : "memory"
1389  );
1390  }
1391 }
1392 
1394  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1395  int offset)
1396 {
1397  int y;
1398  double ftmp[7];
1399  DECLARE_VAR_LOW32;
1400 
1401  offset = ((offset + 1) | 1) << log2_denom;
1402 
1403  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1404  __asm__ volatile (
1405  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1406  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1407  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
1408  "mtc1 %[weight], %[ftmp3] \n\t"
1409  "mtc1 %[weightd], %[ftmp4] \n\t"
1410  "mtc1 %[offset], %[ftmp5] \n\t"
1411  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1412  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1413  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1414  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1415  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1416  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1417  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1418  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1419  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1420  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1421  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1422  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1423  MMI_SWC1(%[ftmp1], %[dst], 0x00)
1424  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1425  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1426  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1427  RESTRICT_ASM_LOW32
1428  [ftmp6]"=&f"(ftmp[6])
1429  : [dst]"r"(dst), [src]"r"(src),
1430  [weight]"r"(weights), [weightd]"r"(weightd),
1431  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1432  : "memory"
1433  );
1434  }
1435 }
1436 
1437 void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1438  int8_t *tc0)
1439 {
1440  double ftmp[12];
1441  mips_reg addr[2];
1442  DECLARE_VAR_LOW32;
1443  DECLARE_VAR_ALL64;
1444  DECLARE_VAR_ADDRT;
1445 
1446  __asm__ volatile (
1447  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1448  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1449  PTR_ADDU "%[addr1], %[stride], %[addr0] \n\t"
1450  "addi %[alpha], %[alpha], -0x01 \n\t"
1451  PTR_SUBU "%[addr1], $0, %[addr1] \n\t"
1452  "addi %[beta], %[beta], -0x01 \n\t"
1453  PTR_ADDU "%[addr1], %[addr1], %[pix] \n\t"
1454  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1455  MMI_LDXC1(%[ftmp1], %[addr1], %[stride], 0x00)
1456  MMI_LDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1457  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1458  "mtc1 %[alpha], %[ftmp5] \n\t"
1459  "mtc1 %[beta], %[ftmp6] \n\t"
1460  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1461  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1462  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1463  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1464  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1465  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1466  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1467  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1468  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1469  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1470  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1471  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1472  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1473  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1474  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1475  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1476  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1477  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1478  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1479  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
1480  MMI_ULWC1(%[ftmp5], %[tc0], 0x00)
1481  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1482  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp5] \n\t"
1483  "pcmpgtb %[ftmp5], %[ftmp9], %[ftmp4] \n\t"
1484  MMI_LDC1(%[ftmp4], %[addr1], 0x00)
1485  "and %[ftmp10], %[ftmp5], %[ftmp8] \n\t"
1486  "psubusb %[ftmp8], %[ftmp4], %[ftmp2] \n\t"
1487  "psubusb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1488  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1489  "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1490  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1491  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1492  "and %[ftmp5], %[ftmp10], %[ftmp9] \n\t"
1493  "psubb %[ftmp8], %[ftmp5], %[ftmp7] \n\t"
1494  "and %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1495  "pavgb %[ftmp5], %[ftmp2], %[ftmp3] \n\t"
1496  MMI_LDC1(%[ftmp11], %[addr1], 0x00)
1497  "pavgb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1498  "xor %[ftmp5], %[ftmp5], %[ftmp11] \n\t"
1499  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1500  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1501  "psubusb %[ftmp5], %[ftmp1], %[ftmp7] \n\t"
1502  "paddusb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1503  "pmaxub %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1504  "pminub %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1505  MMI_SDXC1(%[ftmp4], %[addr1], %[stride], 0x00)
1506  MMI_LDXC1(%[ftmp5], %[pix], %[addr0], 0x00)
1507  "psubusb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
1508  "psubusb %[ftmp7], %[ftmp3], %[ftmp5] \n\t"
1509  "psubusb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1510  "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1511  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1512  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1513  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1514  "and %[ftmp6], %[ftmp9], %[ftmp7] \n\t"
1515  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1516  "pavgb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1517  MMI_LDXC1(%[ftmp11], %[pix], %[addr0], 0x00)
1518  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1519  "xor %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1520  "and %[ftmp7], %[ftmp7], %[ff_pb_1] \n\t"
1521  "psubusb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1522  "psubusb %[ftmp7], %[ftmp4], %[ftmp6] \n\t"
1523  "paddusb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1524  "pmaxub %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1525  "pminub %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1526  MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1527  "xor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1528  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1529  "and %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1530  "xor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1531  "xor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1532  "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1533  "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1534  "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1535  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1536  "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1537  "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1538  "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1539  "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1540  "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1541  "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1542  "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1543  "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1544  "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1545  MMI_SDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1546  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1547  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1548  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1549  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1550  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1551  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1552  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1553  RESTRICT_ASM_LOW32
1554  RESTRICT_ASM_ALL64
1555  RESTRICT_ASM_ADDRT
1556  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
1557  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1558  [alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta),
1559  [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1),
1560  [ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1)
1561  : "memory"
1562  );
1563 }
1564 
1565 static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1566  int beta)
1567 {
1568  DECLARE_ALIGNED(8, const uint64_t, stack[0x0a]);
1569  double ftmp[16];
1570  uint64_t tmp[1];
1571  mips_reg addr[3];
1572  DECLARE_VAR_ALL64;
1573  DECLARE_VAR_ADDRT;
1574 
1575  __asm__ volatile (
1576  "ori %[tmp0], $0, 0x01 \n\t"
1577  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1578  "mtc1 %[tmp0], %[ftmp9] \n\t"
1579  PTR_SLL "%[addr0], %[stride], 0x02 \n\t"
1580  PTR_ADDU "%[addr2], %[stride], %[stride] \n\t"
1581  PTR_ADDIU "%[alpha], %[alpha], -0x01 \n\t"
1582  PTR_SLL "%[ftmp11], %[ftmp9], %[ftmp9] \n\t"
1583  "bltz %[alpha], 1f \n\t"
1584  PTR_ADDU "%[addr1], %[addr2], %[stride] \n\t"
1585  PTR_ADDIU "%[beta], %[beta], -0x01 \n\t"
1586  "bltz %[beta], 1f \n\t"
1587  PTR_SUBU "%[addr0], $0, %[addr0] \n\t"
1588  PTR_ADDU "%[addr0], %[addr0], %[pix] \n\t"
1589  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1590  MMI_LDXC1(%[ftmp1], %[addr0], %[addr2], 0x00)
1591  MMI_LDXC1(%[ftmp2], %[addr0], %[addr1], 0x00)
1592  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1593  "mtc1 %[alpha], %[ftmp5] \n\t"
1594  "mtc1 %[beta], %[ftmp6] \n\t"
1595  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1596  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1597  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1598  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1599  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1600  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1601  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1602  MMI_SDC1(%[ftmp5], %[stack], 0x10)
1603  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1604  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1605  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1606  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1607  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1608  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1609  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1610  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1611  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1612  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1613  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1614  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1615  MMI_LDC1(%[ftmp5], %[stack], 0x10)
1616  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1617  "ldc1 %[ftmp10], %[ff_pb_1] \n\t"
1618  MMI_SDC1(%[ftmp8], %[stack], 0x20)
1619  "pavgb %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1620  "psubusb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1621  "pavgb %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
1622  "psubusb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1623  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1624  "psubusb %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1625  MMI_LDC1(%[ftmp15], %[stack], 0x20)
1626  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1627  "and %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
1628  MMI_LDXC1(%[ftmp15], %[addr0], %[stride], 0x00)
1629  "psubusb %[ftmp8], %[ftmp15], %[ftmp2] \n\t"
1630  "psubusb %[ftmp5], %[ftmp2], %[ftmp15] \n\t"
1631  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1632  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1633  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1634  "and %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1635  MMI_LDXC1(%[ftmp14], %[pix], %[addr2], 0x00)
1636  MMI_SDC1(%[ftmp5], %[stack], 0x30)
1637  "psubusb %[ftmp8], %[ftmp14], %[ftmp3] \n\t"
1638  "psubusb %[ftmp5], %[ftmp3], %[ftmp14] \n\t"
1639  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1640  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1641  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1642  "and %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1643  MMI_SDC1(%[ftmp5], %[stack], 0x40)
1644  "pavgb %[ftmp5], %[ftmp15], %[ftmp1] \n\t"
1645  "pavgb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1646  "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1647  MMI_SDC1(%[ftmp6], %[stack], 0x10)
1648  "paddb %[ftmp7], %[ftmp15], %[ftmp1] \n\t"
1649  "paddb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1650  "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1651  "mov.d %[ftmp8], %[ftmp7] \n\t"
1652  MMI_SDC1(%[ftmp7], %[stack], 0x00)
1653  "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1654  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1655  "xor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1656  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1657  "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1658  "pavgb %[ftmp6], %[ftmp15], %[ftmp4] \n\t"
1659  "psubb %[ftmp7], %[ftmp15], %[ftmp4] \n\t"
1660  "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1661  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1662  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1663  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1664  MMI_LDC1(%[ftmp13], %[stack], 0x10)
1665  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1666  "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1667  "pavgb %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1668  "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1669  "xor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1670  "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1671  "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1672  "xor %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
1673  "pavgb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1674  "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1675  "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1676  MMI_LDC1(%[ftmp13], %[stack], 0x30)
1677  "pavgb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1678  MMI_LDC1(%[ftmp12], %[stack], 0x20)
1679  "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1680  "xor %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
1681  "and %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1682  "and %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1683  "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1684  "xor %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1685  MMI_SDXC1(%[ftmp6], %[addr0], %[addr1], 0x00)
1686  MMI_LDC1(%[ftmp6], %[addr0], 0x00)
1687  "paddb %[ftmp7], %[ftmp15], %[ftmp6] \n\t"
1688  "pavgb %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1689  MMI_LDC1(%[ftmp12], %[stack], 0x00)
1690  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1691  "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1692  "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1693  "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1694  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1695  "xor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1696  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1697  MMI_LDC1(%[ftmp12], %[stack], 0x30)
1698  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1699  "xor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1700  "xor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1701  "and %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1702  "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1703  "xor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1704  "xor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1705  MMI_SDXC1(%[ftmp5], %[addr0], %[addr2], 0x00)
1706  MMI_SDXC1(%[ftmp6], %[addr0], %[stride], 0x00)
1707  "pavgb %[ftmp5], %[ftmp14], %[ftmp4] \n\t"
1708  "pavgb %[ftmp6], %[ftmp3], %[ftmp2] \n\t"
1709  "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1710  MMI_SDC1(%[ftmp6], %[stack], 0x10)
1711  "paddb %[ftmp7], %[ftmp14], %[ftmp4] \n\t"
1712  "paddb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1713  "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1714  "mov.d %[ftmp8], %[ftmp7] \n\t"
1715  MMI_SDC1(%[ftmp7], %[stack], 0x00)
1716  "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1717  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1718  "xor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1719  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1720  "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1721  "pavgb %[ftmp6], %[ftmp14], %[ftmp1] \n\t"
1722  "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1723  "psubb %[ftmp7], %[ftmp14], %[ftmp1] \n\t"
1724  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1725  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1726  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1727  MMI_LDC1(%[ftmp12], %[stack], 0x10)
1728  "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1729  "pavgb %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1730  "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1731  "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1732  "xor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1733  "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1734  "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1735  "xor %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
1736  "pavgb %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
1737  "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1738  MMI_LDC1(%[ftmp12], %[stack], 0x40)
1739  "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1740  MMI_LDC1(%[ftmp13], %[stack], 0x20)
1741  "pavgb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1742  "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1743  "xor %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1744  "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1745  "and %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
1746  "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1747  "xor %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1748  MMI_SDC1(%[ftmp6], %[pix], 0x00)
1749  MMI_LDXC1(%[ftmp6], %[pix], %[addr1], 0x00)
1750  "paddb %[ftmp7], %[ftmp14], %[ftmp6] \n\t"
1751  "pavgb %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1752  MMI_LDC1(%[ftmp12], %[stack], 0x00)
1753  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1754  "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1755  "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1756  "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1757  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1758  "xor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1759  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1760  MMI_LDC1(%[ftmp12], %[stack], 0x40)
1761  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1762  "xor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1763  "xor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1764  "and %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1765  "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1766  "xor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1767  "xor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1768  MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1769  MMI_SDXC1(%[ftmp6], %[pix], %[addr2], 0x00)
1770  "1: \n\t"
1771  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1772  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1773  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1774  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1775  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1776  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1777  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
1778  [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
1779  [tmp0]"=&r"(tmp[0]),
1780  RESTRICT_ASM_ALL64
1781  RESTRICT_ASM_ADDRT
1782  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
1783  [addr2]"=&r"(addr[2]),
1784  [alpha]"+&r"(alpha), [beta]"+&r"(beta)
1785  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1786  [stack]"r"(stack), [ff_pb_1]"m"(ff_pb_1)
1787  : "memory"
1788  );
1789 }
1790 
1791 void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1792  int beta, int8_t *tc0)
1793 {
1794  double ftmp[9];
1795  mips_reg addr[1];
1796  DECLARE_VAR_LOW32;
1797  DECLARE_VAR_ALL64;
1798  DECLARE_VAR_ADDRT;
1799 
1800  __asm__ volatile (
1801  "addi %[alpha], %[alpha], -0x01 \n\t"
1802  "addi %[beta], %[beta], -0x01 \n\t"
1803  "or %[addr0], $0, %[pix] \n\t"
1804  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1805  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1806  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1807  MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1808  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1809  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1810 
1811  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1812  "mtc1 %[alpha], %[ftmp5] \n\t"
1813  "mtc1 %[beta], %[ftmp6] \n\t"
1814  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1815  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1816  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1817  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1818  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1819  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1820  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1821  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1822  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1823  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1824  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1825  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1826  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1827  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1828  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1829  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1830  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1831  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1832  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1833  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1834  MMI_ULWC1(%[ftmp7], %[tc0], 0x00)
1835  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1836  "and %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1837  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1838  "xor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1839  "xor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1840  "and %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1841  "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1842  "xor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1843  "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1844  "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1845  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1846  "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1847  "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1848  "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1849  "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1850  "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1851  "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1852  "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1853  "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1854  "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1855 
1856  MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1857  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1858  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1859  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1860  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1861  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1862  [ftmp8]"=&f"(ftmp[8]),
1863  RESTRICT_ASM_LOW32
1864  RESTRICT_ASM_ALL64
1865  RESTRICT_ASM_ADDRT
1866  [addr0]"=&r"(addr[0])
1867  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1868  [alpha]"r"(alpha), [beta]"r"(beta),
1869  [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1),
1870  [ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1)
1871  : "memory"
1872  );
1873 }
1874 
1876  int beta)
1877 {
1878  double ftmp[9];
1879  mips_reg addr[1];
1880  DECLARE_VAR_ALL64;
1881  DECLARE_VAR_ADDRT;
1882 
1883  __asm__ volatile (
1884  "addi %[alpha], %[alpha], -0x01 \n\t"
1885  "addi %[beta], %[beta], -0x01 \n\t"
1886  "or %[addr0], $0, %[pix] \n\t"
1887  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1888  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1889  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1890  MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1891  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1892  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1893 
1894  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1895  "mtc1 %[alpha], %[ftmp5] \n\t"
1896  "mtc1 %[beta], %[ftmp6] \n\t"
1897  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1898  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1899  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1900  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1901  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1902  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1903  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1904  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1905  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1906  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1907  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1908  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1909  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1910  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1911  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1912  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1913  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1914  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1915  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1916  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1917  "mov.d %[ftmp6], %[ftmp2] \n\t"
1918  "mov.d %[ftmp7], %[ftmp3] \n\t"
1919  "xor %[ftmp5], %[ftmp2], %[ftmp4] \n\t"
1920  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1921  "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1922  "psubusb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1923  "pavgb %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1924  "xor %[ftmp5], %[ftmp3], %[ftmp1] \n\t"
1925  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1926  "pavgb %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1927  "psubusb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1928  "pavgb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1929  "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1930  "psubb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1931  "and %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1932  "and %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1933  "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1934  "paddb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1935 
1936  MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1937  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1938  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1939  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1940  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1941  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1942  [ftmp8]"=&f"(ftmp[8]),
1943  RESTRICT_ASM_ALL64
1944  RESTRICT_ASM_ADDRT
1945  [addr0]"=&r"(addr[0])
1946  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1947  [alpha]"r"(alpha), [beta]"r"(beta),
1948  [ff_pb_1]"f"(ff_pb_1)
1949  : "memory"
1950  );
1951 }
1952 
1953 void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1954  int8_t *tc0)
1955 {
1956  double ftmp[11];
1957  mips_reg addr[6];
1958  DECLARE_VAR_LOW32;
1959 
1960  __asm__ volatile (
1961  "addi %[alpha], %[alpha], -0x01 \n\t"
1962  "addi %[beta], %[beta], -0x01 \n\t"
1963  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1964  PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
1965  PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
1966  PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
1967  "or %[addr5], $0, %[pix] \n\t"
1968  PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
1969  MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
1970  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
1971  MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
1972  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
1973  MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
1974  MMI_ULWC1(%[ftmp3], %[pix], 0x00)
1975  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1976  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1977  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
1978  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
1979  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1980  MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
1981  PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
1982  MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
1983  PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
1984  MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
1985  PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
1986  MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
1987  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1988  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1989  "mov.d %[ftmp6], %[ftmp4] \n\t"
1990  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1991  "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1992  "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
1993  "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
1994  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1995  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1996  "mov.d %[ftmp9], %[ftmp0] \n\t"
1997  "mov.d %[ftmp10], %[ftmp3] \n\t"
1998 
1999  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2000  "mtc1 %[alpha], %[ftmp4] \n\t"
2001  "mtc1 %[beta], %[ftmp5] \n\t"
2002  "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2003  "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2004  "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2005  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2006  "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2007  "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2008  "or %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2009  "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2010  "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2011  "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2012  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2013  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2014  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2015  "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2016  "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2017  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2018  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2019  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2020  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2021  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2022  MMI_ULWC1(%[ftmp6], %[tc0], 0x00)
2023  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2024  "and %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2025  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2026  "xor %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
2027  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2028  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
2029  "pavgb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
2030  "xor %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
2031  "pavgb %[ftmp3], %[ftmp3], %[ff_pb_3] \n\t"
2032  "pavgb %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
2033  "pavgb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2034  "paddusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2035  "psubusb %[ftmp6], %[ff_pb_A1], %[ftmp3] \n\t"
2036  "psubusb %[ftmp3], %[ftmp3], %[ff_pb_A1] \n\t"
2037  "pminub %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
2038  "pminub %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2039  "psubusb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2040  "psubusb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2041  "paddusb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2042  "paddusb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2043 
2044  "punpckhwd %[ftmp4], %[ftmp9], %[ftmp9] \n\t"
2045  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2046  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2047  "punpcklbh %[ftmp0], %[ftmp9], %[ftmp1] \n\t"
2048  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
2049  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2050  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2051  MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2052  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2053  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2054  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2055  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2056  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2057  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2058  "punpckhwd %[ftmp3], %[ftmp10], %[ftmp10] \n\t"
2059  MMI_USWC1(%[ftmp0], %[pix], 0x00)
2060  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2061  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2062  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2063  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2064  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2065  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2066  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2067  PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2068  PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2069  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2070  MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2071  PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2072  "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2073  MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2074  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2075  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2076  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2077  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2078  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2079  [ftmp10]"=&f"(ftmp[10]),
2080  RESTRICT_ASM_LOW32
2081  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2082  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2083  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2084  [pix]"+&r"(pix)
2085  : [alpha]"r"(alpha), [beta]"r"(beta),
2086  [stride]"r"((mips_reg)stride), [tc0]"r"(tc0),
2087  [ff_pb_1]"f"(ff_pb_1), [ff_pb_3]"f"(ff_pb_3),
2088  [ff_pb_A1]"f"(ff_pb_A1)
2089  : "memory"
2090  );
2091 }
2092 
2094  int beta)
2095 {
2096  double ftmp[11];
2097  mips_reg addr[6];
2098  DECLARE_VAR_LOW32;
2099 
2100  __asm__ volatile (
2101  "addi %[alpha], %[alpha], -0x01 \n\t"
2102  "addi %[beta], %[beta], -0x01 \n\t"
2103  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2104  PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
2105  PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
2106  PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
2107  "or %[addr5], $0, %[pix] \n\t"
2108  PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
2109  MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
2110  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2111  MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
2112  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2113  MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
2114  MMI_ULWC1(%[ftmp3], %[pix], 0x00)
2115  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2116  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2117  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2118  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
2119  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2120  MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
2121  PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
2122  MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
2123  PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
2124  MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
2125  PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
2126  MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
2127  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2128  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
2129  "mov.d %[ftmp6], %[ftmp4] \n\t"
2130  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2131  "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
2132  "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
2133  "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
2134  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2135  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2136 
2137  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2138  "mtc1 %[alpha], %[ftmp4] \n\t"
2139  "mtc1 %[beta], %[ftmp5] \n\t"
2140  "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2141  "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2142  "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2143  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2144  "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2145  "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2146  "or %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2147  "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2148  "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2149  "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2150  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2151  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2152  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2153  "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2154  "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2155  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2156  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2157  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2158  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2159  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2160  "mov.d %[ftmp5], %[ftmp1] \n\t"
2161  "mov.d %[ftmp6], %[ftmp2] \n\t"
2162  "xor %[ftmp4], %[ftmp1], %[ftmp3] \n\t"
2163  "and %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2164  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2165  "psubusb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
2166  "pavgb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2167  "xor %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2168  "and %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2169  "pavgb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2170  "psubusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2171  "pavgb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2172  "psubb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2173  "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2174  "and %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2175  "and %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2176  "paddb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2177  "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2178 
2179  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2180  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2181  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2182  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2183  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2184  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2185  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2186  MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2187  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2188  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2189  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2190  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2191  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2192  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2193  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2194  MMI_USWC1(%[ftmp0], %[pix], 0x00)
2195  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2196  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2197  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2198  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2199  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2200  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2201  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2202  PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2203  PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2204  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2205  PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2206  MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2207  "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2208  MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2209  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2210  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2211  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2212  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2213  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2214  [ftmp10]"=&f"(ftmp[10]),
2215  RESTRICT_ASM_LOW32
2216  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2217  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2218  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2219  [pix]"+&r"(pix)
2220  : [alpha]"r"(alpha), [beta]"r"(beta),
2221  [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1)
2222  : "memory"
2223  );
2224 }
2225 
2226 void ff_deblock_v_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2227  int8_t *tc0)
2228 {
2229  if ((tc0[0] & tc0[1]) >= 0)
2230  ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
2231  if ((tc0[2] & tc0[3]) >= 0)
2232  ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
2233 }
2234 
2236  int beta)
2237 {
2238  deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
2239  deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
2240 }
2241 
2242 void ff_deblock_h_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2243  int8_t *tc0)
2244 {
2245  DECLARE_ALIGNED(8, const uint64_t, stack[0x0d]);
2246  double ftmp[9];
2247  mips_reg addr[8];
2248  DECLARE_VAR_LOW32;
2249  DECLARE_VAR_ALL64;
2250 
2251  __asm__ volatile (
2252  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2253  PTR_ADDI "%[addr1], %[pix], -0x4 \n\t"
2254  PTR_ADDU "%[addr2], %[stride], %[addr0] \n\t"
2255  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2256  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2257  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2258  MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2259  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2260  MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2261  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2262  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2263  MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2264  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2265  MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2266  PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2267  MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2268  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2269  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2270  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2271  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2272  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2273  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2274  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2275  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2276  MMI_SDC1(%[ftmp1], %[stack], 0x10)
2277  MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2278  PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2279  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2280  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2281  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2282  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2283  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2284  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2285  MMI_LDC1(%[ftmp8], %[stack], 0x10)
2286  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2287  MMI_SDC1(%[ftmp0], %[stack], 0x00)
2288  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2289  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2290  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2291  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2292  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2293  "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2294  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2295  "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2296  "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2297  MMI_SDC1(%[ftmp1], %[stack], 0x10)
2298  MMI_SDC1(%[ftmp3], %[stack], 0x20)
2299  MMI_SDC1(%[ftmp7], %[stack], 0x30)
2300  MMI_SDC1(%[ftmp5], %[stack], 0x40)
2301  MMI_SDC1(%[ftmp6], %[stack], 0x50)
2302  PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2303  PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2304  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2305  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2306  MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2307  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2308  MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2309  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2310  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2311  MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2312  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2313  MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2314  PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2315  MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2316  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2317  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2318  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2319  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2320  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2321  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2322  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2323  MMI_SDC1(%[ftmp1], %[stack], 0x18)
2324  MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2325  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2326  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2327  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2328  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2329  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2330  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2331  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2332  MMI_LDC1(%[ftmp8], %[stack], 0x18)
2333  MMI_SDC1(%[ftmp0], %[stack], 0x08)
2334  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2335  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2336  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2337  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2338  "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2339  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2340  "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2341  "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2342  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2343  MMI_SDC1(%[ftmp1], %[stack], 0x18)
2344  MMI_SDC1(%[ftmp3], %[stack], 0x28)
2345  MMI_SDC1(%[ftmp7], %[stack], 0x38)
2346  MMI_SDC1(%[ftmp5], %[stack], 0x48)
2347  MMI_SDC1(%[ftmp6], %[stack], 0x58)
2348  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2349  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2350  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2351  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2352  [ftmp8]"=&f"(ftmp[8]),
2353  RESTRICT_ASM_ALL64
2354  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2355  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2356  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2357  [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2358  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2359  [stack]"r"(stack)
2360  : "memory"
2361  );
2362 
2363  ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
2364 
2365  __asm__ volatile (
2366  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2367  PTR_ADDI "%[addr1], %[pix], -0x02 \n\t"
2368  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2369  PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2370  PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2371  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2372  MMI_LDC1(%[ftmp0], %[stack], 0x10)
2373  MMI_LDC1(%[ftmp1], %[stack], 0x20)
2374  MMI_LDC1(%[ftmp2], %[stack], 0x30)
2375  MMI_LDC1(%[ftmp3], %[stack], 0x40)
2376  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2377  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2378  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2379  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2380  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2381  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2382  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2383  MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2384  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2385  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2386  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2387  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2388  MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2389  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2390  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2391  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2392  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2393  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2394  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2395  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2396  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2397  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2398  PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2399  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2400  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2401  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2402  MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2403  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2404  "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2405  PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2406  MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2407  PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2408  MMI_LDC1(%[ftmp0], %[stack], 0x18)
2409  MMI_LDC1(%[ftmp1], %[stack], 0x28)
2410  MMI_LDC1(%[ftmp2], %[stack], 0x38)
2411  MMI_LDC1(%[ftmp3], %[stack], 0x48)
2412  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2413  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2414  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2415  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2416  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2417  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2418  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2419  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2420  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2421  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2422  MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2423  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2424  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2425  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2426  MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2427  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2428  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2429  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2430  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2431  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2432  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2433  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2434  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2435  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2436  PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2437  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2438  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2439  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2440  MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2441  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2442  "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2443  MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2444  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2445  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2446  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2447  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2448  [ftmp8]"=&f"(ftmp[8]),
2449  RESTRICT_ASM_LOW32
2450  RESTRICT_ASM_ALL64
2451  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2452  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2453  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2454  [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2455  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2456  [stack]"r"(stack)
2457  : "memory"
2458  );
2459 }
2460 
2462  int beta)
2463 {
2464  DECLARE_ALIGNED(8, const uint64_t, ptmp[0x11]);
2465  DECLARE_ALIGNED(8, const uint64_t, pdat[0x04]);
2466  double ftmp[9];
2467  mips_reg addr[7];
2468  DECLARE_VAR_ALL64;
2469 
2470  __asm__ volatile (
2471  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2472  PTR_ADDI "%[addr1], %[pix], -0x04 \n\t"
2473  PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2474  PTR_ADDU "%[addr3], %[addr0], %[addr0] \n\t"
2475  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2476  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2477  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2478  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2479  MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2480  MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2481  PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2482  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2483  PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2484  MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2485  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2486  MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2487  MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2488  PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2489  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2490  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2491  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2492  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2493  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2494  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2495  MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2496  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2497  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2498  MMI_SDC1(%[ftmp3], %[ptmp], 0x00)
2499  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2500  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2501  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2502  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2503  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2504  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2505  MMI_SDC1(%[ftmp2], %[ptmp], 0x20)
2506  MMI_LDC1(%[ftmp2], %[ptmp], 0x00)
2507  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2508  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2509  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2510  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2511  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2512  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2513  MMI_SDC1(%[ftmp0], %[ptmp], 0x00)
2514  MMI_SDC1(%[ftmp5], %[ptmp], 0x10)
2515  MMI_SDC1(%[ftmp7], %[ptmp], 0x40)
2516  MMI_SDC1(%[ftmp4], %[ptmp], 0x50)
2517  MMI_LDC1(%[ftmp8], %[ptmp], 0x20)
2518  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2519  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2520  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2521  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2522  PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2523  MMI_SDC1(%[ftmp3], %[ptmp], 0x20)
2524  MMI_SDC1(%[ftmp0], %[ptmp], 0x30)
2525  MMI_SDC1(%[ftmp6], %[ptmp], 0x60)
2526  MMI_SDC1(%[ftmp5], %[ptmp], 0x70)
2527  PTR_ADDU "%[addr1], %[addr1], %[addr5] \n\t"
2528  PTR_ADDU "%[addr4], %[addr4], %[addr5] \n\t"
2529  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2530  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2531  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2532  MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2533  MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2534  PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2535  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2536  PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2537  MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2538  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2539  MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2540  MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2541  PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2542  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2543  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2544  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2545  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2546  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2547  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2548  MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2549  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2550  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2551  MMI_SDC1(%[ftmp3], %[ptmp], 0x08)
2552  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2553  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2554  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2555  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2556  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2557  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2558  MMI_SDC1(%[ftmp2], %[ptmp], 0x28)
2559  MMI_LDC1(%[ftmp2], %[ptmp], 0x08)
2560  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2561  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2562  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2563  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2564  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2565  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2566  MMI_SDC1(%[ftmp0], %[ptmp], 0x08)
2567  MMI_SDC1(%[ftmp5], %[ptmp], 0x18)
2568  MMI_SDC1(%[ftmp7], %[ptmp], 0x48)
2569  MMI_SDC1(%[ftmp4], %[ptmp], 0x58)
2570  MMI_LDC1(%[ftmp8], %[ptmp], 0x28)
2571  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2572  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2573  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2574  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2575  MMI_SDC1(%[ftmp3], %[ptmp], 0x28)
2576  MMI_SDC1(%[ftmp0], %[ptmp], 0x38)
2577  MMI_SDC1(%[ftmp6], %[ptmp], 0x68)
2578  MMI_SDC1(%[ftmp5], %[ptmp], 0x78)
2579  PTR_S "%[addr1], 0x00(%[pdat]) \n\t"
2580  PTR_S "%[addr2], 0x08(%[pdat]) \n\t"
2581  PTR_S "%[addr0], 0x10(%[pdat]) \n\t"
2582  PTR_S "%[addr3], 0x18(%[pdat]) \n\t"
2583  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2584  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2585  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2586  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2587  [ftmp8]"=&f"(ftmp[8]),
2588  RESTRICT_ASM_ALL64
2589  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2590  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2591  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2592  [addr6]"=&r"(addr[6])
2593  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2594  [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2595  : "memory"
2596  );
2597 
2598  ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
2599 
2600  __asm__ volatile (
2601  PTR_L "%[addr1], 0x00(%[pdat]) \n\t"
2602  PTR_L "%[addr2], 0x08(%[pdat]) \n\t"
2603  PTR_L "%[addr0], 0x10(%[pdat]) \n\t"
2604  PTR_L "%[addr3], 0x18(%[pdat]) \n\t"
2605  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2606  MMI_LDC1(%[ftmp0], %[ptmp], 0x08)
2607  MMI_LDC1(%[ftmp1], %[ptmp], 0x18)
2608  MMI_LDC1(%[ftmp2], %[ptmp], 0x28)
2609  MMI_LDC1(%[ftmp3], %[ptmp], 0x38)
2610  MMI_LDC1(%[ftmp4], %[ptmp], 0x48)
2611  MMI_LDC1(%[ftmp5], %[ptmp], 0x58)
2612  MMI_LDC1(%[ftmp6], %[ptmp], 0x68)
2613  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2614  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2615  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2616  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2617  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2618  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2619  MMI_LDC1(%[ftmp8], %[ptmp], 0x78)
2620  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2621  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2622  MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2623  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2624  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2625  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2626  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2627  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2628  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2629  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2630  MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2631  MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2632  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2633  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2634  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2635  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2636  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2637  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2638  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2639  MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2640  PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2641  MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2642  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2643  MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2644  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2645  MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2646  MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2647  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2648  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2649  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2650  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2651  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2652  MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2653  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2654  MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2655  PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2656  MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2657  PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2658  MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2659  PTR_SUBU "%[addr1], %[addr1], %[addr5] \n\t"
2660  PTR_SUBU "%[addr4], %[addr4], %[addr5] \n\t"
2661  MMI_LDC1(%[ftmp0], %[ptmp], 0x00)
2662  MMI_LDC1(%[ftmp1], %[ptmp], 0x10)
2663  MMI_LDC1(%[ftmp2], %[ptmp], 0x20)
2664  MMI_LDC1(%[ftmp3], %[ptmp], 0x30)
2665  MMI_LDC1(%[ftmp4], %[ptmp], 0x40)
2666  MMI_LDC1(%[ftmp5], %[ptmp], 0x50)
2667  MMI_LDC1(%[ftmp6], %[ptmp], 0x60)
2668  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2669  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2670  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2671  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2672  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2673  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2674  MMI_LDC1(%[ftmp8], %[ptmp], 0x70)
2675  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2676  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2677  MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2678  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2679  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2680  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2681  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2682  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2683  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2684  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2685  MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2686  MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2687  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2688  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2689  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2690  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2691  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2692  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2693  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2694  MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2695  PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2696  MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2697  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2698  MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2699  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2700  MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2701  MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2702  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2703  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2704  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2705  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2706  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2707  MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2708  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2709  MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2710  PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2711  MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2712  MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2713  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2714  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2715  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2716  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2717  [ftmp8]"=&f"(ftmp[8]),
2718  RESTRICT_ASM_ALL64
2719  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2720  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2721  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2722  [addr6]"=&r"(addr[6])
2723  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2724  [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2725  : "memory"
2726  );
2727 }
#define mips_reg
Definition: asmdefs.h:44
void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:768
void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
Definition: h264dsp_mmi.c:1119
void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2235
else temp
Definition: vf_mcdeint.c:256
#define PTR_SLL
Definition: asmdefs.h:55
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:683
void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2461
void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:815
void ff_deblock_v_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:2226
The exact code depends on how similar the blocks are and how related they are to the block
uint8_t
void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input, int qmul)
Definition: h264dsp_mmi.c:859
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
#define PTR_ADDI
Definition: asmdefs.h:49
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:117
#define height
const uint64_t ff_pb_A1
Definition: constants.c:60
const uint64_t ff_pw_32
Definition: constants.c:43
void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1437
void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:831
#define src
Definition: vp8dsp.c:255
void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:798
static const int weights[]
Definition: hevc_pel.c:32
const uint64_t ff_pb_3
Definition: constants.c:58
void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:171
void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:785
void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1355
void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
Definition: h264dsp_mmi.c:1086
void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:79
const uint64_t ff_pw_1
Definition: constants.c:26
void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1260
void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1791
#define PTR_SUBU
Definition: asmdefs.h:50
#define b
Definition: input.c:41
void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1393
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1189
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1133
#define src1
Definition: h264pred.c:140
void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:639
void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1953
static const int16_t alpha[]
Definition: ilbcdata.h:55
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2]...the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so...,+,-,+,-,+,+,-,+,-,+,...hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32-hcoeff[1]-hcoeff[2]-...a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2}an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||.........intra?||||:Block01:yes no||||:Block02:.................||||:Block03::y DC::ref index:||||:Block04::cb DC::motion x:||||.........:cr DC::motion y:||||.................|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------------------------------|||Y subbands||Cb subbands||Cr subbands||||------||------||------|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||------||------||------||||------||------||------|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||------||------||------||||------||------||------|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||------||------||------||||------||------||------|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------------------------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction------------|\Dequantization-------------------\||Reference frames|\IDWT|--------------|Motion\|||Frame 0||Frame 1||Compensation.OBMC v-------|--------------|--------------.\------> Frame n output Frame Frame<----------------------------------/|...|-------------------Range Coder:============Binary Range Coder:-------------------The implemented range coder is an adapted version based upon"Range encoding: an algorithm for removing redundancy from a digitised message."by G.N.N.Martin.The symbols encoded by the Snow range coder are bits(0|1).The associated probabilities are not fix but change depending on the symbol mix seen so far.bit seen|new state---------+-----------------------------------------------0|256-state_transition_table[256-old_state];1|state_transition_table[old_state];state_transition_table={0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:-------------------------FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1.the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
static const uint8_t scan8[16 *3+3]
Definition: h264dec.h:651
void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1875
#define PTR_ADDIU
Definition: asmdefs.h:48
static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1565
void ff_deblock_h_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:2242
#define src0
Definition: h264pred.c:139
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
#define PTR_L
Definition: asmdefs.h:51
static int weight(int i, int blen, int offset)
Definition: diracdec.c:1561
const uint64_t ff_pb_1
Definition: constants.c:57
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
__asm__(".macro parse_r var r\n\t""\\var = -1\n\t"_IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)".iflt \\var\n\t"".error \"Unable to parse register name \\r\"\n\t"".endif\n\t"".endm")
void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
Definition: h264dsp_mmi.c:31
void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2093
#define PTR_S
Definition: asmdefs.h:52
static int16_t block1[64]
Definition: dct.c:117
#define PTR_ADDU
Definition: asmdefs.h:47
void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1303
#define stride
#define PTR_SRL
Definition: asmdefs.h:54
int i
Definition: input.c:407
static uint8_t tmp[11]
Definition: aes_ctr.c:27