FFmpeg
h264pred_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264pred
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "h264pred_mips.h"
28 #include "constants.h"
29 
31 {
32  double ftmp[2];
33  uint64_t tmp[1];
34  DECLARE_VAR_ALL64;
35 
36  __asm__ volatile (
37  "dli %[tmp0], 0x08 \n\t"
38  MMI_LDC1(%[ftmp0], %[srcA], 0x00)
39  MMI_LDC1(%[ftmp1], %[srcA], 0x08)
40 
41  "1: \n\t"
42  MMI_SDC1(%[ftmp0], %[src], 0x00)
43  MMI_SDC1(%[ftmp1], %[src], 0x08)
44  PTR_ADDU "%[src], %[src], %[stride] \n\t"
45  MMI_SDC1(%[ftmp0], %[src], 0x00)
46  MMI_SDC1(%[ftmp1], %[src], 0x08)
47 
48  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
49  PTR_ADDU "%[src], %[src], %[stride] \n\t"
50  "bnez %[tmp0], 1b \n\t"
51  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
52  [tmp0]"=&r"(tmp[0]),
53  RESTRICT_ASM_ALL64
54  [src]"+&r"(src)
55  : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride))
56  : "memory"
57  );
58 }
59 
61 {
62  uint64_t tmp[3];
63  mips_reg addr[2];
64 
65  __asm__ volatile (
66  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
67  PTR_ADDU "%[addr1], %[src], $0 \n\t"
68  "dli %[tmp2], 0x08 \n\t"
69  "1: \n\t"
70  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
71  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
72  "swl %[tmp1], 0x07(%[addr1]) \n\t"
73  "swr %[tmp1], 0x00(%[addr1]) \n\t"
74  "swl %[tmp1], 0x0f(%[addr1]) \n\t"
75  "swr %[tmp1], 0x08(%[addr1]) \n\t"
76  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
77  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
78  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
79  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
80  "swl %[tmp1], 0x07(%[addr1]) \n\t"
81  "swr %[tmp1], 0x00(%[addr1]) \n\t"
82  "swl %[tmp1], 0x0f(%[addr1]) \n\t"
83  "swr %[tmp1], 0x08(%[addr1]) \n\t"
84  "daddi %[tmp2], %[tmp2], -0x01 \n\t"
85  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
86  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
87  "bnez %[tmp2], 1b \n\t"
88  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
89  [tmp2]"=&r"(tmp[2]),
90  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
91  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
92  [ff_pb_1]"r"(ff_pb_1)
93  : "memory"
94  );
95 }
96 
98 {
99  uint64_t tmp[4];
100  mips_reg addr[2];
101 
102  __asm__ volatile (
103  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
104  "dli %[tmp0], 0x08 \n\t"
105  "xor %[tmp3], %[tmp3], %[tmp3] \n\t"
106  "1: \n\t"
107  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
108  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
109  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
110  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
111  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
112  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
113  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
114  "bnez %[tmp0], 1b \n\t"
115 
116  "dli %[tmp0], 0x08 \n\t"
117  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
118  "2: \n\t"
119  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
120  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
121  PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t"
122  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
123  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
124  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
125  PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t"
126  "bnez %[tmp0], 2b \n\t"
127 
128  "daddiu %[tmp3], %[tmp3], 0x10 \n\t"
129  "dsra %[tmp3], 0x05 \n\t"
130  "dmul %[tmp2], %[tmp3], %[ff_pb_1] \n\t"
131  PTR_ADDU "%[addr0], %[src], $0 \n\t"
132  "dli %[tmp0], 0x08 \n\t"
133  "3: \n\t"
134  "swl %[tmp2], 0x07(%[addr0]) \n\t"
135  "swr %[tmp2], 0x00(%[addr0]) \n\t"
136  "swl %[tmp2], 0x0f(%[addr0]) \n\t"
137  "swr %[tmp2], 0x08(%[addr0]) \n\t"
138  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
139  "swl %[tmp2], 0x07(%[addr0]) \n\t"
140  "swr %[tmp2], 0x00(%[addr0]) \n\t"
141  "swl %[tmp2], 0x0f(%[addr0]) \n\t"
142  "swr %[tmp2], 0x08(%[addr0]) \n\t"
143  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
144  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
145  "bnez %[tmp0], 3b \n\t"
146  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
147  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
148  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
149  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
150  [ff_pb_1]"r"(ff_pb_1)
151  : "memory"
152  );
153 }
154 
155 void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
156  int has_topright, ptrdiff_t stride)
157 {
158  uint32_t dc;
159  double ftmp[11];
160  mips_reg tmp[3];
161  DECLARE_VAR_ALL64;
162  DECLARE_VAR_ADDRT;
163 
164  __asm__ volatile (
165  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
166  MMI_ULDC1(%[ftmp10], %[srcA], 0x00)
167  MMI_ULDC1(%[ftmp9], %[src0], 0x00)
168  MMI_ULDC1(%[ftmp8], %[src1], 0x00)
169 
170  "punpcklbh %[ftmp7], %[ftmp10], %[ftmp0] \n\t"
171  "punpckhbh %[ftmp6], %[ftmp10], %[ftmp0] \n\t"
172  "punpcklbh %[ftmp5], %[ftmp9], %[ftmp0] \n\t"
173  "punpckhbh %[ftmp4], %[ftmp9], %[ftmp0] \n\t"
174  "punpcklbh %[ftmp3], %[ftmp8], %[ftmp0] \n\t"
175  "punpckhbh %[ftmp2], %[ftmp8], %[ftmp0] \n\t"
176  "bnez %[has_topleft], 1f \n\t"
177  "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
178 
179  "1: \n\t"
180  "bnez %[has_topright], 2f \n\t"
181  "dli %[tmp0], 0xa4 \n\t"
182  "mtc1 %[tmp0], %[ftmp1] \n\t"
183  "pshufh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
184 
185  "2: \n\t"
186  "dli %[tmp0], 0x02 \n\t"
187  "mtc1 %[tmp0], %[ftmp1] \n\t"
188  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_2] \n\t"
189  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_2] \n\t"
190  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
191  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
192  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
193  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
194  "paddh %[ftmp7], %[ftmp7], %[ff_pw_2] \n\t"
195  "paddh %[ftmp6], %[ftmp6], %[ff_pw_2] \n\t"
196  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
197  "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
198  "packushb %[ftmp9], %[ftmp7], %[ftmp6] \n\t"
199  "biadd %[ftmp10], %[ftmp9] \n\t"
200  "mfc1 %[tmp1], %[ftmp10] \n\t"
201  "addiu %[tmp1], %[tmp1], 0x04 \n\t"
202  "srl %[tmp1], %[tmp1], 0x03 \n\t"
203  "mul %[dc], %[tmp1], %[ff_pb_1] \n\t"
204  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
205  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
206  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
207  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
208  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
209  [ftmp10]"=&f"(ftmp[10]),
210  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
211  RESTRICT_ASM_ALL64
212  [dc]"=r"(dc)
213  : [srcA]"r"((mips_reg)(src-stride-1)),
214  [src0]"r"((mips_reg)(src-stride)),
215  [src1]"r"((mips_reg)(src-stride+1)),
216  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright),
217  [ff_pb_1]"r"(ff_pb_1), [ff_pw_2]"f"(ff_pw_2)
218  : "memory"
219  );
220 
221  __asm__ volatile (
222  "dli %[tmp0], 0x02 \n\t"
223  "punpcklwd %[ftmp0], %[dc], %[dc] \n\t"
224 
225  "1: \n\t"
226  MMI_SDC1(%[ftmp0], %[src], 0x00)
227  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
228  PTR_ADDU "%[src], %[src], %[stride] \n\t"
229  PTR_ADDU "%[src], %[src], %[stride] \n\t"
230  MMI_SDC1(%[ftmp0], %[src], 0x00)
231  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
232 
233  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
234  PTR_ADDU "%[src], %[src], %[stride] \n\t"
235  PTR_ADDU "%[src], %[src], %[stride] \n\t"
236  "bnez %[tmp0], 1b \n\t"
237  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
238  RESTRICT_ASM_ALL64
239  RESTRICT_ASM_ADDRT
240  [src]"+&r"(src)
241  : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
242  : "memory"
243  );
244 }
245 
246 void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
247  ptrdiff_t stride)
248 {
249  uint32_t dc, dc1, dc2;
250  double ftmp[14];
251  mips_reg tmp[1];
252 
253  const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
254  const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
255  const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2;
256  const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2;
257  const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2;
258  const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2;
259  const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2;
260  const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2;
261 
262  DECLARE_VAR_ALL64;
263  DECLARE_VAR_ADDRT;
264 
265  __asm__ volatile (
266  MMI_ULDC1(%[ftmp4], %[srcA], 0x00)
267  MMI_ULDC1(%[ftmp5], %[src0], 0x00)
268  MMI_ULDC1(%[ftmp6], %[src1], 0x00)
269  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
270  "dli %[tmp0], 0x03 \n\t"
271  "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t"
272  "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
273  "mtc1 %[tmp0], %[ftmp1] \n\t"
274  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
275  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
276  "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t"
277  "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
278  "pshufh %[ftmp3], %[ftmp8], %[ftmp1] \n\t"
279  "pshufh %[ftmp13], %[ftmp12], %[ftmp1] \n\t"
280  "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t"
281  "pinsrh_3 %[ftmp12], %[ftmp12], %[ftmp3] \n\t"
282  "bnez %[has_topleft], 1f \n\t"
283  "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
284 
285  "1: \n\t"
286  "bnez %[has_topright], 2f \n\t"
287  "pshufh %[ftmp13], %[ftmp10], %[ftmp1] \n\t"
288  "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t"
289 
290  "2: \n\t"
291  "dli %[tmp0], 0x02 \n\t"
292  "mtc1 %[tmp0], %[ftmp1] \n\t"
293  "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
294  "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
295  "pmullh %[ftmp10], %[ftmp10], %[ftmp2] \n\t"
296  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
297  "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
298  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
299  "paddh %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
300  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
301  "paddh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
302  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
303  "psrah %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
304  "packushb %[ftmp5], %[ftmp7], %[ftmp8] \n\t"
305  "biadd %[ftmp4], %[ftmp5] \n\t"
306  "mfc1 %[dc2], %[ftmp4] \n\t"
307  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
308  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
309  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
310  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
311  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
312  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
313  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
314  [tmp0]"=&r"(tmp[0]),
315  RESTRICT_ASM_ALL64
316  [dc2]"=r"(dc2)
317  : [srcA]"r"((mips_reg)(src-stride-1)),
318  [src0]"r"((mips_reg)(src-stride)),
319  [src1]"r"((mips_reg)(src-stride+1)),
320  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright)
321  : "memory"
322  );
323 
324  dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
325  dc = ((dc1+dc2+8)>>4)*0x01010101U;
326 
327  __asm__ volatile (
328  "dli %[tmp0], 0x02 \n\t"
329  "punpcklwd %[ftmp0], %[dc], %[dc] \n\t"
330 
331  "1: \n\t"
332  MMI_SDC1(%[ftmp0], %[src], 0x00)
333  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
334  PTR_ADDU "%[src], %[src], %[stride] \n\t"
335  PTR_ADDU "%[src], %[src], %[stride] \n\t"
336  MMI_SDC1(%[ftmp0], %[src], 0x00)
337  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
338 
339  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
340  PTR_ADDU "%[src], %[src], %[stride] \n\t"
341  PTR_ADDU "%[src], %[src], %[stride] \n\t"
342  "bnez %[tmp0], 1b \n\t"
343  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
344  RESTRICT_ASM_ALL64
345  RESTRICT_ASM_ADDRT
346  [src]"+&r"(src)
347  : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
348  : "memory"
349  );
350 }
351 
352 void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
353  int has_topright, ptrdiff_t stride)
354 {
355  double ftmp[12];
356  mips_reg tmp[1];
357  DECLARE_VAR_ALL64;
358 
359  __asm__ volatile (
360  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
361  MMI_LDC1(%[ftmp3], %[srcA], 0x00)
362  MMI_LDC1(%[ftmp4], %[src0], 0x00)
363  MMI_LDC1(%[ftmp5], %[src1], 0x00)
364  "punpcklbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t"
365  "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
366  "punpcklbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
367  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
368  "punpcklbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
369  "punpckhbh %[ftmp11], %[ftmp5], %[ftmp0] \n\t"
370  "bnez %[has_topleft], 1f \n\t"
371  "pinsrh_0 %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
372 
373  "1: \n\t"
374  "bnez %[has_topright], 2f \n\t"
375  "dli %[tmp0], 0xa4 \n\t"
376  "mtc1 %[tmp0], %[ftmp1] \n\t"
377  "pshufh %[ftmp11], %[ftmp11], %[ftmp1] \n\t"
378 
379  "2: \n\t"
380  "dli %[tmp0], 0x02 \n\t"
381  "mtc1 %[tmp0], %[ftmp1] \n\t"
382  "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
383  "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
384  "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
385  "paddh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
386  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
387  "paddh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
388  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
389  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
390  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
391  "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
392  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
393  "packushb %[ftmp4], %[ftmp6], %[ftmp7] \n\t"
394  MMI_SDC1(%[ftmp4], %[src], 0x00)
395  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
396  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
397  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
398  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
399  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
400  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
401  [tmp0]"=&r"(tmp[0]),
402  RESTRICT_ASM_ALL64
403  [src]"=r"(src)
404  : [srcA]"r"((mips_reg)(src-stride-1)),
405  [src0]"r"((mips_reg)(src-stride)),
406  [src1]"r"((mips_reg)(src-stride+1)),
407  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright)
408  : "memory"
409  );
410 
411  __asm__ volatile (
412  "dli %[tmp0], 0x02 \n\t"
413 
414  "1: \n\t"
415  MMI_SDC1(%[ftmp0], %[src], 0x00)
416  PTR_ADDU "%[src], %[src], %[stride] \n\t"
417  MMI_SDC1(%[ftmp0], %[src], 0x00)
418  PTR_ADDU "%[src], %[src], %[stride] \n\t"
419  MMI_SDC1(%[ftmp0], %[src], 0x00)
420  PTR_ADDU "%[src], %[src], %[stride] \n\t"
421  MMI_SDC1(%[ftmp0], %[src], 0x00)
422 
423  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
424  PTR_ADDU "%[src], %[src], %[stride] \n\t"
425  "bnez %[tmp0], 1b \n\t"
426  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
427  RESTRICT_ASM_ALL64
428  [src]"+&r"(src)
429  : [stride]"r"((mips_reg)stride)
430  : "memory"
431  );
432 }
433 
434 void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
435  ptrdiff_t stride)
436 {
437  const int dc = (src[-stride] + src[1-stride] + src[2-stride]
438  + src[3-stride] + src[-1+0*stride] + src[-1+1*stride]
439  + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
440  uint64_t tmp[2];
441  mips_reg addr[1];
442  DECLARE_VAR_ADDRT;
443 
444  __asm__ volatile (
445  PTR_ADDU "%[tmp0], %[dc], $0 \n\t"
446  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
447  "xor %[addr0], %[addr0], %[addr0] \n\t"
448  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
449  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
450  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
451  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
452  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
453  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
454  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
455  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
456  RESTRICT_ASM_ADDRT
457  [addr0]"=&r"(addr[0])
458  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
459  [dc]"r"(dc), [ff_pb_1]"r"(ff_pb_1)
460  : "memory"
461  );
462 }
463 
465 {
466  uint64_t tmp[2];
467  mips_reg addr[2];
468 
469  __asm__ volatile (
470  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
471  PTR_ADDU "%[addr1], %[src], $0 \n\t"
472  "ldl %[tmp0], 0x07(%[addr0]) \n\t"
473  "ldr %[tmp0], 0x00(%[addr0]) \n\t"
474  "dli %[tmp1], 0x04 \n\t"
475  "1: \n\t"
476  "sdl %[tmp0], 0x07(%[addr1]) \n\t"
477  "sdr %[tmp0], 0x00(%[addr1]) \n\t"
478  PTR_ADDU "%[addr1], %[stride] \n\t"
479  "sdl %[tmp0], 0x07(%[addr1]) \n\t"
480  "sdr %[tmp0], 0x00(%[addr1]) \n\t"
481  "daddi %[tmp1], -0x01 \n\t"
482  PTR_ADDU "%[addr1], %[stride] \n\t"
483  "bnez %[tmp1], 1b \n\t"
484  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
485  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
486  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride)
487  : "memory"
488  );
489 }
490 
492 {
493  uint64_t tmp[3];
494  mips_reg addr[2];
495 
496  __asm__ volatile (
497  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
498  PTR_ADDU "%[addr1], %[src], $0 \n\t"
499  "dli %[tmp0], 0x04 \n\t"
500  "1: \n\t"
501  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
502  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
503  "swl %[tmp2], 0x07(%[addr1]) \n\t"
504  "swr %[tmp2], 0x00(%[addr1]) \n\t"
505  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
506  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
507  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
508  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
509  "swl %[tmp2], 0x07(%[addr1]) \n\t"
510  "swr %[tmp2], 0x00(%[addr1]) \n\t"
511  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
512  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
513  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
514  "bnez %[tmp0], 1b \n\t"
515  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
516  [tmp2]"=&r"(tmp[2]),
517  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
518  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
519  [ff_pb_1]"r"(ff_pb_1)
520  : "memory"
521  );
522 }
523 
525 {
526  double ftmp[4];
527  uint64_t tmp[1];
528  mips_reg addr[1];
529  DECLARE_VAR_ALL64;
530 
531  __asm__ volatile (
532  "dli %[tmp0], 0x02 \n\t"
533  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
534  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
535  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
536  "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
537  "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
538  "biadd %[ftmp2], %[ftmp2] \n\t"
539  "biadd %[ftmp3], %[ftmp3] \n\t"
540  "mtc1 %[tmp0], %[ftmp1] \n\t"
541  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
542  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
543  "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
544  "paddush %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
545  "paddush %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
546  "mtc1 %[tmp0], %[ftmp1] \n\t"
547  "psrlh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
548  "psrlh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
549  "packushb %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
550  MMI_SDC1(%[ftmp1], %[src], 0x00)
551  PTR_ADDU "%[src], %[src], %[stride] \n\t"
552  MMI_SDC1(%[ftmp1], %[src], 0x00)
553  PTR_ADDU "%[src], %[src], %[stride] \n\t"
554  MMI_SDC1(%[ftmp1], %[src], 0x00)
555  PTR_ADDU "%[src], %[src], %[stride] \n\t"
556  MMI_SDC1(%[ftmp1], %[src], 0x00)
557  PTR_ADDU "%[src], %[src], %[stride] \n\t"
558  MMI_SDC1(%[ftmp1], %[src], 0x00)
559  PTR_ADDU "%[src], %[src], %[stride] \n\t"
560  MMI_SDC1(%[ftmp1], %[src], 0x00)
561  PTR_ADDU "%[src], %[src], %[stride] \n\t"
562  MMI_SDC1(%[ftmp1], %[src], 0x00)
563  PTR_ADDU "%[src], %[src], %[stride] \n\t"
564  MMI_SDC1(%[ftmp1], %[src], 0x00)
565  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
566  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
567  [tmp0]"=&r"(tmp[0]),
568  RESTRICT_ASM_ALL64
569  [addr0]"=&r"(addr[0]),
570  [src]"+&r"(src)
571  : [stride]"r"((mips_reg)stride)
572  : "memory"
573  );
574 }
575 
577 {
578  double ftmp[5];
579  mips_reg addr[7];
580 
581  __asm__ volatile (
582  "negu %[addr0], %[stride] \n\t"
583  PTR_ADDU "%[addr0], %[addr0], %[src] \n\t"
584  PTR_ADDIU "%[addr1], %[addr0], 0x04 \n\t"
585  "lbu %[addr2], 0x00(%[addr0]) \n\t"
586  PTR_ADDU "%[addr3], $0, %[addr2] \n\t"
587  PTR_ADDIU "%[addr0], 0x01 \n\t"
588  "lbu %[addr2], 0x00(%[addr1]) \n\t"
589  PTR_ADDU "%[addr4], $0, %[addr2] \n\t"
590  PTR_ADDIU "%[addr1], 0x01 \n\t"
591  "lbu %[addr2], 0x00(%[addr0]) \n\t"
592  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
593  PTR_ADDIU "%[addr0], 0x01 \n\t"
594  "lbu %[addr2], 0x00(%[addr1]) \n\t"
595  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
596  PTR_ADDIU "%[addr1], 0x01 \n\t"
597  "lbu %[addr2], 0x00(%[addr0]) \n\t"
598  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
599  PTR_ADDIU "%[addr0], 0x01 \n\t"
600  "lbu %[addr2], 0x00(%[addr1]) \n\t"
601  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
602  PTR_ADDIU "%[addr1], 0x01 \n\t"
603  "lbu %[addr2], 0x00(%[addr0]) \n\t"
604  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
605  PTR_ADDIU "%[addr0], 0x01 \n\t"
606  "lbu %[addr2], 0x00(%[addr1]) \n\t"
607  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
608  PTR_ADDIU "%[addr1], 0x01 \n\t"
609  "dli %[addr2], -0x01 \n\t"
610  PTR_ADDU "%[addr2], %[addr2], %[src] \n\t"
611  "lbu %[addr1], 0x00(%[addr2]) \n\t"
612  PTR_ADDU "%[addr5], $0, %[addr1] \n\t"
613  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
614  "lbu %[addr1], 0x00(%[addr2]) \n\t"
615  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
616  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
617  "lbu %[addr1], 0x00(%[addr2]) \n\t"
618  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
619  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
620  "lbu %[addr1], 0x00(%[addr2]) \n\t"
621  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
622  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
623  "lbu %[addr1], 0x00(%[addr2]) \n\t"
624  PTR_ADDU "%[addr6], $0, %[addr1] \n\t"
625  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
626  "lbu %[addr1], 0x00(%[addr2]) \n\t"
627  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
628  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
629  "lbu %[addr1], 0x00(%[addr2]) \n\t"
630  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
631  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
632  "lbu %[addr1], 0x00(%[addr2]) \n\t"
633  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
634  PTR_ADDU "%[addr3], %[addr3], %[addr5] \n\t"
635  PTR_ADDIU "%[addr3], %[addr3], 0x04 \n\t"
636  PTR_ADDIU "%[addr4], %[addr4], 0x02 \n\t"
637  PTR_ADDIU "%[addr1], %[addr6], 0x02 \n\t"
638  PTR_ADDU "%[addr2], %[addr4], %[addr1] \n\t"
639  PTR_SRL "%[addr3], 0x03 \n\t"
640  PTR_SRL "%[addr4], 0x02 \n\t"
641  PTR_SRL "%[addr1], 0x02 \n\t"
642  PTR_SRL "%[addr2], 0x03 \n\t"
643  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
644  "dmtc1 %[addr3], %[ftmp1] \n\t"
645  "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
646  "dmtc1 %[addr4], %[ftmp2] \n\t"
647  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
648  "dmtc1 %[addr1], %[ftmp3] \n\t"
649  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
650  "dmtc1 %[addr2], %[ftmp4] \n\t"
651  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
652  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
653  "packushb %[ftmp2], %[ftmp3], %[ftmp4] \n\t"
654  PTR_ADDU "%[addr0], $0, %[src] \n\t"
655  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
656  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
657  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
658  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
659  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
660  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
661  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
662  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
663  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
664  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
665  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
666  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
667  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
668  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
669  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
670  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
671  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
672  [ftmp4]"=&f"(ftmp[4]),
673  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
674  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
675  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
676  [addr6]"=&r"(addr[6])
677  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride)
678  : "memory"
679  );
680 }
681 
683 {
684  double ftmp[1];
685  uint64_t tmp[1];
686  DECLARE_VAR_ALL64;
687 
688  __asm__ volatile (
689  MMI_LDC1(%[ftmp0], %[srcA], 0x00)
690  "dli %[tmp0], 0x04 \n\t"
691 
692  "1: \n\t"
693  MMI_SDC1(%[ftmp0], %[src], 0x00)
694  PTR_ADDU "%[src], %[src], %[stride] \n\t"
695  MMI_SDC1(%[ftmp0], %[src], 0x00)
696  PTR_ADDU "%[src], %[src], %[stride] \n\t"
697  MMI_SDC1(%[ftmp0], %[src], 0x00)
698  PTR_ADDU "%[src], %[src], %[stride] \n\t"
699  MMI_SDC1(%[ftmp0], %[src], 0x00)
700 
701  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
702  PTR_ADDU "%[src], %[src], %[stride] \n\t"
703  "bnez %[tmp0], 1b \n\t"
704  : [ftmp0]"=&f"(ftmp[0]),
705  [tmp0]"=&r"(tmp[0]),
706  RESTRICT_ASM_ALL64
707  [src]"+&r"(src)
708  : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride))
709  : "memory"
710  );
711 }
712 
714 {
715  uint64_t tmp[3];
716  mips_reg addr[2];
717 
718  __asm__ volatile (
719  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
720  PTR_ADDU "%[addr1], %[src], $0 \n\t"
721  "dli %[tmp0], 0x08 \n\t"
722  "1: \n\t"
723  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
724  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
725  "swl %[tmp2], 0x07(%[addr1]) \n\t"
726  "swr %[tmp2], 0x00(%[addr1]) \n\t"
727  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
728  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
729  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
730  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
731  "swl %[tmp2], 0x07(%[addr1]) \n\t"
732  "swr %[tmp2], 0x00(%[addr1]) \n\t"
733  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
734  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
735  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
736  "bnez %[tmp0], 1b \n\t"
737  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
738  [tmp2]"=&r"(tmp[2]),
739  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
740  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
741  [ff_pb_1]"r"(ff_pb_1)
742  : "memory"
743  );
744 }
745 
746 static inline void pred16x16_plane_compat_mmi(uint8_t *src, int stride,
747  const int svq3, const int rv40)
748 {
749  double ftmp[11];
750  uint64_t tmp[6];
751  mips_reg addr[1];
752  DECLARE_VAR_ALL64;
753 
754  __asm__ volatile(
755  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
756  "dli %[tmp0], 0x20 \n\t"
757  "dmtc1 %[tmp0], %[ftmp4] \n\t"
758  MMI_ULDC1(%[ftmp0], %[addr0], -0x01)
759  MMI_ULDC1(%[ftmp2], %[addr0], 0x08)
760  "dsrl %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
761  "dsrl %[ftmp3], %[ftmp2], %[ftmp4] \n\t"
762  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
763  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
764  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
765  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
766  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
767  "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t"
768  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t"
769  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t"
770  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t"
771  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
772  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
773  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
774  "dli %[tmp0], 0x0e \n\t"
775  "dmtc1 %[tmp0], %[ftmp4] \n\t"
776  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
777  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
778  "dli %[tmp0], 0x01 \n\t"
779  "dmtc1 %[tmp0], %[ftmp4] \n\t"
780  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
781  "paddsh %[ftmp5], %[ftmp0], %[ftmp1] \n\t"
782 
783  PTR_ADDIU "%[addr0], %[src], -0x01 \n\t"
784  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
785  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
786  "lbu %[tmp5], 0x10(%[addr0]) \n\t"
787  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
788  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
789  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
790  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
791  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
792  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
793  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
794  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
795  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
796  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
797  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
798  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
799  "dmtc1 %[tmp2], %[ftmp0] \n\t"
800 
801  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
802  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
803  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
804  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
805  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
806  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
807  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
808  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
809  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
810  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
811  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
812  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
813  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
814  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
815  "dmtc1 %[tmp2], %[ftmp1] \n\t"
816 
817  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
818  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
819  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
820  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
821  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
822  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
823  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
824  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
825  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
826  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
827  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
828  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
829  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
830  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
831  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
832  "dmtc1 %[tmp2], %[ftmp2] \n\t"
833 
834  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
835  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
836  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
837  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
838  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
839  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
840  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
841  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
842  "daddu %[tmp5], %[tmp5], %[tmp0] \n\t"
843  "daddiu %[tmp5], %[tmp5], 0x01 \n\t"
844  "dsll %[tmp5], %[tmp5], 0x04 \n\t"
845 
846  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
847  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
848  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
849  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
850  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
851  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
852  "dmtc1 %[tmp2], %[ftmp3] \n\t"
853 
854  "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t"
855  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t"
856  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t"
857  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t"
858  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
859  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
860  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
861  "dli %[tmp0], 0x0e \n\t"
862  "dmtc1 %[tmp0], %[ftmp4] \n\t"
863  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
864  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
865 
866  "dli %[tmp0], 0x01 \n\t"
867  "dmtc1 %[tmp0], %[ftmp4] \n\t"
868  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
869  "paddsh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
870 
871  "dmfc1 %[tmp0], %[ftmp5] \n\t"
872  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
873  "dsra %[tmp0], %[tmp0], 0x30 \n\t"
874  "dmfc1 %[tmp1], %[ftmp6] \n\t"
875  "dsll %[tmp1], %[tmp1], 0x30 \n\t"
876  "dsra %[tmp1], %[tmp1], 0x30 \n\t"
877 
878  "beqz %[svq3], 1f \n\t"
879  "dli %[tmp2], 0x04 \n\t"
880  "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t"
881  "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t"
882  "dli %[tmp2], 0x05 \n\t"
883  "dmul %[tmp0], %[tmp0], %[tmp2] \n\t"
884  "dmul %[tmp1], %[tmp1], %[tmp2] \n\t"
885  "dli %[tmp2], 0x10 \n\t"
886  "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t"
887  "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t"
888  "daddu %[tmp2], %[tmp0], $0 \n\t"
889  "daddu %[tmp0], %[tmp1], $0 \n\t"
890  "daddu %[tmp1], %[tmp2], $0 \n\t"
891  "b 2f \n\t"
892 
893  "1: \n\t"
894  "beqz %[rv40], 1f \n\t"
895  "dsra %[tmp2], %[tmp0], 0x02 \n\t"
896  "daddu %[tmp0], %[tmp0], %[tmp2] \n\t"
897  "dsra %[tmp2], %[tmp1], 0x02 \n\t"
898  "daddu %[tmp1], %[tmp1], %[tmp2] \n\t"
899  "dsra %[tmp0], %[tmp0], 0x04 \n\t"
900  "dsra %[tmp1], %[tmp1], 0x04 \n\t"
901  "b 2f \n\t"
902 
903  "1: \n\t"
904  "dli %[tmp2], 0x05 \n\t"
905  "dmul %[tmp0], %[tmp0], %[tmp2] \n\t"
906  "dmul %[tmp1], %[tmp1], %[tmp2] \n\t"
907  "daddiu %[tmp0], %[tmp0], 0x20 \n\t"
908  "daddiu %[tmp1], %[tmp1], 0x20 \n\t"
909  "dsra %[tmp0], %[tmp0], 0x06 \n\t"
910  "dsra %[tmp1], %[tmp1], 0x06 \n\t"
911 
912  "2: \n\t"
913  "daddu %[tmp3], %[tmp0], %[tmp1] \n\t"
914  "dli %[tmp2], 0x07 \n\t"
915  "dmul %[tmp3], %[tmp3], %[tmp2] \n\t"
916  "dsubu %[tmp5], %[tmp5], %[tmp3] \n\t"
917 
918  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
919  "dmtc1 %[tmp0], %[ftmp0] \n\t"
920  "pshufh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
921  "dmtc1 %[tmp1], %[ftmp5] \n\t"
922  "pshufh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
923  "dmtc1 %[tmp5], %[ftmp6] \n\t"
924  "pshufh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
925  "dli %[tmp0], 0x05 \n\t"
926  "dmtc1 %[tmp0], %[ftmp7] \n\t"
927  "pmullh %[ftmp1], %[ff_pw_0to3], %[ftmp0] \n\t"
928  "dmtc1 %[ff_pw_4to7], %[ftmp2] \n\t"
929  "pmullh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
930  "dmtc1 %[ff_pw_8tob], %[ftmp3] \n\t"
931  "pmullh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
932  "dmtc1 %[ff_pw_ctof], %[ftmp4] \n\t"
933  "pmullh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
934 
935  "dli %[tmp0], 0x10 \n\t"
936  PTR_ADDU "%[addr0], %[src], $0 \n\t"
937  "1: \n\t"
938  "paddsh %[ftmp8], %[ftmp1], %[ftmp6] \n\t"
939  "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
940  "paddsh %[ftmp9], %[ftmp2], %[ftmp6] \n\t"
941  "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t"
942  "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t"
943  MMI_SDC1(%[ftmp0], %[addr0], 0x00)
944 
945  "paddsh %[ftmp8], %[ftmp3], %[ftmp6] \n\t"
946  "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
947  "paddsh %[ftmp9], %[ftmp4], %[ftmp6] \n\t"
948  "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t"
949  "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t"
950  MMI_SDC1(%[ftmp0], %[addr0], 0x08)
951 
952  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
953  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
954  "daddiu %[tmp0], %[tmp0], -0x01 \n\t"
955  "bnez %[tmp0], 1b \n\t"
956  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
957  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
958  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
959  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
960  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
961  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
962  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
963  [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
964  RESTRICT_ASM_ALL64
965  [addr0]"=&r"(addr[0])
966  : [src]"r"(src), [stride]"r"((mips_reg)stride),
967  [svq3]"r"(svq3), [rv40]"r"(rv40),
972  : "memory"
973  );
974 }
975 
977 {
978  pred16x16_plane_compat_mmi(src, stride, 0, 0);
979 }
980 
982 {
983  pred16x16_plane_compat_mmi(src, stride, 1, 0);
984 }
985 
987 {
988  pred16x16_plane_compat_mmi(src, stride, 0, 1);
989 }
#define mips_reg
Definition: asmdefs.h:44
void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:713
const uint64_t ff_pw_ctof
Definition: constants.c:55
void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:491
const uint64_t ff_pw_1to4
Definition: constants.c:50
const uint64_t ff_pw_2
Definition: constants.c:27
void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:682
void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:155
static void pred16x16_plane_compat_mmi(uint8_t *src, int stride, const int svq3, const int rv40)
Definition: h264pred_mmi.c:746
uint8_t
void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:97
#define PTR_ADDI
Definition: asmdefs.h:49
const uint64_t ff_pw_0to3
Definition: constants.c:52
void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:434
#define U(x)
Definition: vp56_arith.h:37
#define src
Definition: vp8dsp.c:254
const uint64_t ff_pw_4to7
Definition: constants.c:53
void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:246
#define PTR_SUBU
Definition: asmdefs.h:50
void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:60
void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:976
void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:352
const uint64_t ff_pw_m8tom5
Definition: constants.c:48
#define src1
Definition: h264pred.c:139
void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:30
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2]...the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so...,+,-,+,-,+,+,-,+,-,+,...hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32-hcoeff[1]-hcoeff[2]-...a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2}an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||.........intra?||||:Block01:yes no||||:Block02:.................||||:Block03::y DC::ref index:||||:Block04::cb DC::motion x:||||.........:cr DC::motion y:||||.................|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------------------------------|||Y subbands||Cb subbands||Cr subbands||||------||------||------|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||------||------||------||||------||------||------|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||------||------||------||||------||------||------|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||------||------||------||||------||------||------|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------------------------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction------------|\Dequantization-------------------\||Reference frames|\IDWT|--------------|Motion\|||Frame 0||Frame 1||Compensation.OBMC v-------|--------------|--------------.\------> Frame n output Frame Frame<----------------------------------/|...|-------------------Range Coder:============Binary Range Coder:-------------------The implemented range coder is an adapted version based upon"Range encoding: an algorithm for removing redundancy from a digitised message."by G.N.N.Martin.The symbols encoded by the Snow range coder are bits(0|1).The associated probabilities are not fix but change depending on the symbol mix seen so far.bit seen|new state---------+-----------------------------------------------0|256-state_transition_table[256-old_state];1|state_transition_table[old_state];state_transition_table={0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:-------------------------FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1.the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
#define PTR_ADDIU
Definition: asmdefs.h:48
void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:986
#define src0
Definition: h264pred.c:138
const uint64_t ff_pw_5to8
Definition: constants.c:51
const uint64_t ff_pw_8tob
Definition: constants.c:54
const uint64_t ff_pb_1
Definition: constants.c:57
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:576
__asm__(".macro parse_r var r\n\t""\\var = -1\n\t"_IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)".iflt \\var\n\t"".error \"Unable to parse register name \\r\"\n\t"".endif\n\t"".endm")
const uint64_t ff_pw_m4tom1
Definition: constants.c:49
void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:524
#define PTR_ADDU
Definition: asmdefs.h:47
#define stride
#define PTR_SRL
Definition: asmdefs.h:54
void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:464
void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:981
static uint8_t tmp[11]
Definition: aes_ctr.c:26