FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
h264pred_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264pred
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "h264pred_mips.h"
26 
28 {
29  __asm__ volatile (
30  "dli $8, 16 \r\n"
31  "gsldlc1 $f2, 7(%[srcA]) \r\n"
32  "gsldrc1 $f2, 0(%[srcA]) \r\n"
33  "gsldlc1 $f4, 15(%[srcA]) \r\n"
34  "gsldrc1 $f4, 8(%[srcA]) \r\n"
35  "1: \r\n"
36  "gssdlc1 $f2, 7(%[src]) \r\n"
37  "gssdrc1 $f2, 0(%[src]) \r\n"
38  "gssdlc1 $f4, 15(%[src]) \r\n"
39  "gssdrc1 $f4, 8(%[src]) \r\n"
40  "daddu %[src], %[src], %[stride] \r\n"
41  "daddi $8, $8, -1 \r\n"
42  "bnez $8, 1b \r\n"
43  : [src]"+&r"(src)
44  : [stride]"r"(stride),[srcA]"r"(src-stride)
45  : "$8","$f2","$f4"
46  );
47 }
48 
50 {
51  __asm__ volatile (
52  "daddiu $2, %[src], -1 \r\n"
53  "daddu $3, %[src], $0 \r\n"
54  "dli $6, 0x10 \r\n"
55  "1: \r\n"
56  "lbu $4, 0($2) \r\n"
57  "dmul $5, $4, %[ff_pb_1] \r\n"
58  "sdl $5, 7($3) \r\n"
59  "sdr $5, 0($3) \r\n"
60  "sdl $5, 15($3) \r\n"
61  "sdr $5, 8($3) \r\n"
62  "daddu $2, %[stride] \r\n"
63  "daddu $3, %[stride] \r\n"
64  "daddiu $6, -1 \r\n"
65  "bnez $6, 1b \r\n"
66  ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
67  : "$2","$3","$4","$5","$6"
68  );
69 }
70 
72 {
73  __asm__ volatile (
74  "daddiu $2, %[src], -1 \r\n"
75  "dli $6, 0x10 \r\n"
76  "xor $8, $8, $8 \r\n"
77  "1: \r\n"
78  "lbu $4, 0($2) \r\n"
79  "daddu $8, $8, $4 \r\n"
80  "daddu $2, $2, %[stride] \r\n"
81  "daddiu $6, $6, -1 \r\n"
82  "bnez $6, 1b \r\n"
83  "dli $6, 0x10 \r\n"
84  "negu $3, %[stride] \r\n"
85  "daddu $2, %[src], $3 \r\n"
86  "2: \r\n"
87  "lbu $4, 0($2) \r\n"
88  "daddu $8, $8, $4 \r\n"
89  "daddiu $2, $2, 1 \r\n"
90  "daddiu $6, $6, -1 \r\n"
91  "bnez $6, 2b \r\n"
92  "daddiu $8, $8, 0x10 \r\n"
93  "dsra $8, 5 \r\n"
94  "dmul $5, $8, %[ff_pb_1] \r\n"
95  "daddu $2, %[src], $0 \r\n"
96  "dli $6, 0x10 \r\n"
97  "3: \r\n"
98  "sdl $5, 7($2) \r\n"
99  "sdr $5, 0($2) \r\n"
100  "sdl $5, 15($2) \r\n"
101  "sdr $5, 8($2) \r\n"
102  "daddu $2, $2, %[stride] \r\n"
103  "daddiu $6, $6, -1 \r\n"
104  "bnez $6, 3b \r\n"
105  ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
106  : "$2","$3","$4","$5","$6","$8"
107  );
108 }
109 
110 void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
111  int has_topright, ptrdiff_t stride)
112 {
113  uint32_t dc;
114 
115  __asm__ volatile (
116  "ldl $8, 7(%[srcA]) \r\n"
117  "ldr $8, 0(%[srcA]) \r\n"
118  "ldl $9, 7(%[src0]) \r\n"
119  "ldr $9, 0(%[src0]) \r\n"
120  "ldl $10, 7(%[src1]) \r\n"
121  "ldr $10, 0(%[src1]) \r\n"
122  "dmtc1 $8, $f2 \r\n"
123  "dmtc1 $9, $f4 \r\n"
124  "dmtc1 $10, $f6 \r\n"
125  "dmtc1 $0, $f0 \r\n"
126  "punpcklbh $f8, $f2, $f0 \r\n"
127  "punpckhbh $f10, $f2, $f0 \r\n"
128  "punpcklbh $f12, $f4, $f0 \r\n"
129  "punpckhbh $f14, $f4, $f0 \r\n"
130  "punpcklbh $f16, $f6, $f0 \r\n"
131  "punpckhbh $f18, $f6, $f0 \r\n"
132  "bnez %[has_topleft], 1f \r\n"
133  "pinsrh_0 $f8, $f8, $f12 \r\n"
134  "1: \r\n"
135  "bnez %[has_topright], 2f \r\n"
136  "pinsrh_3 $f18, $f18, $f14 \r\n"
137  "2: \r\n"
138  "daddiu $8, $0, 2 \r\n"
139  "dmtc1 $8, $f20 \r\n"
140  "pshufh $f22, $f20, $f0 \r\n"
141  "pmullh $f12, $f12, $f22 \r\n"
142  "pmullh $f14, $f14, $f22 \r\n"
143  "paddh $f8, $f8, $f12 \r\n"
144  "paddh $f10, $f10, $f14 \r\n"
145  "paddh $f8, $f8, $f16 \r\n"
146  "paddh $f10, $f10, $f18 \r\n"
147  "paddh $f8, $f8, $f22 \r\n"
148  "paddh $f10, $f10, $f22 \r\n"
149  "psrah $f8, $f8, $f20 \r\n"
150  "psrah $f10, $f10, $f20 \r\n"
151  "packushb $f4, $f8, $f10 \r\n"
152  "biadd $f2, $f4 \r\n"
153  "mfc1 $9, $f2 \r\n"
154  "addiu $9, $9, 4 \r\n"
155  "dsrl $9, $9, 3 \r\n"
156  "mul %[dc], $9, %[ff_pb_1] \r\n"
157  : [dc]"=r"(dc)
158  : [srcA]"r"(src-stride-1),[src0]"r"(src-stride),
159  [src1]"r"(src-stride+1),[has_topleft]"r"(has_topleft),
160  [has_topright]"r"(has_topright),[ff_pb_1]"r"(ff_pb_1)
161  : "$8","$9","$10","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
162  "$f18","$f20","$f22"
163  );
164 
165  __asm__ volatile (
166  "dli $8, 8 \r\n"
167  "1: \r\n"
168  "punpcklwd $f2, %[dc], %[dc] \r\n"
169  "gssdlc1 $f2, 7(%[src]) \r\n"
170  "gssdrc1 $f2, 0(%[src]) \r\n"
171  "daddu %[src], %[src], %[stride] \r\n"
172  "daddi $8, $8, -1 \r\n"
173  "bnez $8, 1b \r\n"
174  : [src]"+&r"(src)
175  : [dc]"f"(dc),[stride]"r"(stride)
176  : "$8","$f2"
177  );
178 }
179 
180 void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft,
181  int has_topright, ptrdiff_t stride)
182 {
183  uint32_t dc, dc1, dc2;
184 
185  const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
186  const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
187  const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2;
188  const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2;
189  const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2;
190  const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2;
191  const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2;
192  const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2;
193 
194  __asm__ volatile (
195  "ldl $8, 7(%[srcA]) \r\n"
196  "ldr $8, 0(%[srcA]) \r\n"
197  "ldl $9, 7(%[src0]) \r\n"
198  "ldr $9, 0(%[src0]) \r\n"
199  "ldl $10, 7(%[src1]) \r\n"
200  "ldr $10, 0(%[src1]) \r\n"
201  "dmtc1 $8, $f2 \r\n"
202  "dmtc1 $9, $f4 \r\n"
203  "dmtc1 $10, $f6 \r\n"
204  "dmtc1 $0, $f0 \r\n"
205  "punpcklbh $f8, $f2, $f0 \r\n"
206  "punpckhbh $f10, $f2, $f0 \r\n"
207  "punpcklbh $f12, $f4, $f0 \r\n"
208  "punpckhbh $f14, $f4, $f0 \r\n"
209  "punpcklbh $f16, $f6, $f0 \r\n"
210  "punpckhbh $f18, $f6, $f0 \r\n"
211  "daddiu $8, $0, 3 \r\n"
212  "dmtc1 $8, $f20 \r\n"
213  "pshufh $f28, $f10, $f20 \r\n"
214  "pshufh $f30, $f18, $f20 \r\n"
215  "pinsrh_3 $f10, $f10, $f30 \r\n"
216  "pinsrh_3 $f18, $f18, $f28 \r\n"
217  "bnez %[has_topleft], 1f \r\n"
218  "pinsrh_0 $f8, $f8, $f12 \r\n"
219  "1: \r\n"
220  "bnez %[has_topright], 2f \r\n"
221  "pshufh $f30, $f14, $f20 \r\n"
222  "pinsrh_3 $f10, $f10, $f30 \r\n"
223  "2: \r\n"
224  "daddiu $8, $0, 2 \r\n"
225  "dmtc1 $8, $f20 \r\n"
226  "pshufh $f22, $f20, $f0 \r\n"
227  "pmullh $f12, $f12, $f22 \r\n"
228  "pmullh $f14, $f14, $f22 \r\n"
229  "paddh $f8, $f8, $f12 \r\n"
230  "paddh $f10, $f10, $f14 \r\n"
231  "paddh $f8, $f8, $f16 \r\n"
232  "paddh $f10, $f10, $f18 \r\n"
233  "paddh $f8, $f8, $f22 \r\n"
234  "paddh $f10, $f10, $f22 \r\n"
235  "psrah $f8, $f8, $f20 \r\n"
236  "psrah $f10, $f10, $f20 \r\n"
237  "packushb $f4, $f8, $f10 \r\n"
238  "biadd $f2, $f4 \r\n"
239  "mfc1 %[dc2], $f2 \r\n"
240  : [dc2]"=r"(dc2)
241  : [srcA]"r"(src-stride-1),[src0]"r"(src-stride),
242  [src1]"r"(src-stride+1),[has_topleft]"r"(has_topleft),
243  [has_topright]"r"(has_topright)
244  : "$8","$9","$10","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
245  "$f18","$f20","$f22"
246  );
247 
248  dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
249  dc = ((dc1+dc2+8)>>4)*0x01010101U;
250 
251  __asm__ volatile (
252  "dli $8, 8 \r\n"
253  "1: \r\n"
254  "punpcklwd $f2, %[dc], %[dc] \r\n"
255  "gssdlc1 $f2, 7(%[src]) \r\n"
256  "gssdrc1 $f2, 0(%[src]) \r\n"
257  "daddu %[src], %[src], %[stride] \r\n"
258  "daddi $8, $8, -1 \r\n"
259  "bnez $8, 1b \r\n"
260  : [src]"+&r"(src)
261  : [dc]"f"(dc),[stride]"r"(stride)
262  : "$8","$f2"
263  );
264 }
265 
266 void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
267  int has_topright, ptrdiff_t stride)
268 {
269  __asm__ volatile (
270  "ldl $8, 7(%[srcA]) \r\n"
271  "ldr $8, 0(%[srcA]) \r\n"
272  "ldl $9, 7(%[src0]) \r\n"
273  "ldr $9, 0(%[src0]) \r\n"
274  "ldl $10, 7(%[src1]) \r\n"
275  "ldr $10, 0(%[src1]) \r\n"
276  "dmtc1 $8, $f2 \r\n"
277  "dmtc1 $9, $f4 \r\n"
278  "dmtc1 $10, $f6 \r\n"
279  "dmtc1 $0, $f0 \r\n"
280  "punpcklbh $f8, $f2, $f0 \r\n"
281  "punpckhbh $f10, $f2, $f0 \r\n"
282  "punpcklbh $f12, $f4, $f0 \r\n"
283  "punpckhbh $f14, $f4, $f0 \r\n"
284  "punpcklbh $f16, $f6, $f0 \r\n"
285  "punpckhbh $f18, $f6, $f0 \r\n"
286  "bnez %[has_topleft], 1f \r\n"
287  "pinsrh_0 $f8, $f8, $f12 \r\n"
288  "1: \r\n"
289  "bnez %[has_topright], 2f \r\n"
290  "pinsrh_3 $f18, $f18, $f14 \r\n"
291  "2: \r\n"
292  "daddiu $8, $0, 2 \r\n"
293  "dmtc1 $8, $f20 \r\n"
294  "pshufh $f22, $f20, $f0 \r\n"
295  "pmullh $f12, $f12, $f22 \r\n"
296  "pmullh $f14, $f14, $f22 \r\n"
297  "paddh $f8, $f8, $f12 \r\n"
298  "paddh $f10, $f10, $f14 \r\n"
299  "paddh $f8, $f8, $f16 \r\n"
300  "paddh $f10, $f10, $f18 \r\n"
301  "paddh $f8, $f8, $f22 \r\n"
302  "paddh $f10, $f10, $f22 \r\n"
303  "psrah $f8, $f8, $f20 \r\n"
304  "psrah $f10, $f10, $f20 \r\n"
305  "packushb $f4, $f8, $f10 \r\n"
306  "sdc1 $f4, 0(%[src]) \r\n"
307  : [src]"=r"(src)
308  : [srcA]"r"(src-stride-1),[src0]"r"(src-stride),
309  [src1]"r"(src-stride+1),[has_topleft]"r"(has_topleft),
310  [has_topright]"r"(has_topright)
311  : "$8","$9","$10","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
312  "$f18","$f20","$f22"
313  );
314 
315  __asm__ volatile (
316  "dli $8, 7 \r\n"
317  "gsldlc1 $f2, 7(%[src]) \r\n"
318  "gsldrc1 $f2, 0(%[src]) \r\n"
319  "dadd %[src], %[src], %[stride] \r\n"
320  "1: \r\n"
321  "gssdlc1 $f2, 7(%[src]) \r\n"
322  "gssdrc1 $f2, 0(%[src]) \r\n"
323  "daddu %[src], %[src], %[stride] \r\n"
324  "daddi $8, $8, -1 \r\n"
325  "bnez $8, 1b \r\n"
326  : [src]"+&r"(src)
327  : [stride]"r"(stride)
328  : "$8","$f2"
329  );
330 }
331 
332 void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
333  ptrdiff_t stride)
334 {
335  const int dc = (src[-stride] + src[1-stride] + src[2-stride]
336  + src[3-stride] + src[-1+0*stride] + src[-1+1*stride]
337  + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
338 
339  __asm__ volatile (
340  "daddu $2, %[dc], $0 \r\n"
341  "dmul $3, $2, %[ff_pb_1] \r\n"
342  "xor $4, $4, $4 \r\n"
343  "gsswx $3, 0(%[src],$4) \r\n"
344  "daddu $4, %[stride] \r\n"
345  "gsswx $3, 0(%[src],$4) \r\n"
346  "daddu $4, %[stride] \r\n"
347  "gsswx $3, 0(%[src],$4) \r\n"
348  "daddu $4, %[stride] \r\n"
349  "gsswx $3, 0(%[src],$4) \r\n"
350  ::[src]"r"(src),[stride]"r"(stride),[dc]"r"(dc),[ff_pb_1]"r"(ff_pb_1)
351  : "$2","$3","$4"
352  );
353 }
354 
356 {
357  __asm__ volatile (
358  "dsubu $2, %[src], %[stride] \r\n"
359  "daddu $3, %[src], $0 \r\n"
360  "ldl $4, 7($2) \r\n"
361  "ldr $4, 0($2) \r\n"
362  "dli $5, 0x8 \r\n"
363  "1: \r\n"
364  "sdl $4, 7($3) \r\n"
365  "sdr $4, 0($3) \r\n"
366  "daddu $3, %[stride] \r\n"
367  "daddiu $5, -1 \r\n"
368  "bnez $5, 1b \r\n"
369  ::[src]"r"(src),[stride]"r"(stride)
370  : "$2","$3","$4","$5"
371  );
372 }
373 
375 {
376  __asm__ volatile (
377  "daddiu $2, %[src], -1 \r\n"
378  "daddu $3, %[src], $0 \r\n"
379  "dli $6, 0x8 \r\n"
380  "1: \r\n"
381  "lbu $4, 0($2) \r\n"
382  "dmul $5, $4, %[ff_pb_1] \r\n"
383  "sdl $5, 7($3) \r\n"
384  "sdr $5, 0($3) \r\n"
385  "daddu $2, %[stride] \r\n"
386  "daddu $3, %[stride] \r\n"
387  "daddiu $6, -1 \r\n"
388  "bnez $6, 1b \r\n"
389  ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
390  : "$2","$3","$4","$5","$6"
391  );
392 }
393 
395  const int svq3, const int rv40)
396 {
397  __asm__ volatile (
398  "negu $2, %[stride] \r\n"
399  "daddu $3, %[src], $2 \r\n"
400  "xor $f8, $f8, $f8 \r\n"
401  "gslwlc1 $f0, 2($3) \r\n"
402  "gslwrc1 $f0, -1($3) \r\n"
403  "gslwlc1 $f2, 6($3) \r\n"
404  "gslwrc1 $f2, 3($3) \r\n"
405  "gslwlc1 $f4, 11($3) \r\n"
406  "gslwrc1 $f4, 8($3) \r\n"
407  "gslwlc1 $f6, 15($3) \r\n"
408  "gslwrc1 $f6, 12($3) \r\n"
409  "punpcklbh $f0, $f0, $f8 \r\n"
410  "punpcklbh $f2, $f2, $f8 \r\n"
411  "punpcklbh $f4, $f4, $f8 \r\n"
412  "punpcklbh $f6, $f6, $f8 \r\n"
413  "dmtc1 %[ff_pw_m8tom5], $f20 \r\n"
414  "dmtc1 %[ff_pw_m4tom1], $f22 \r\n"
415  "dmtc1 %[ff_pw_1to4], $f24 \r\n"
416  "dmtc1 %[ff_pw_5to8], $f26 \r\n"
417  "pmullh $f0, $f0, $f20 \r\n"
418  "pmullh $f2, $f2, $f22 \r\n"
419  "pmullh $f4, $f4, $f24 \r\n"
420  "pmullh $f6, $f6, $f26 \r\n"
421  "paddsh $f0, $f0, $f4 \r\n"
422  "paddsh $f2, $f2, $f6 \r\n"
423  "paddsh $f0, $f0, $f2 \r\n"
424  "dli $4, 0xE \r\n"
425  "dmtc1 $4, $f28 \r\n"
426  "pshufh $f2, $f0, $f28 \r\n"
427  "paddsh $f0, $f0, $f2 \r\n"
428  "dli $4, 0x1 \r\n"
429  "dmtc1 $4, $f30 \r\n"
430  "pshufh $f2, $f0, $f30 \r\n"
431  "paddsh $f10, $f0, $f2 \r\n"
432  "daddiu $3, %[src], -1 \r\n"
433  "daddu $3, $2 \r\n"
434  "lbu $4, 0($3) \r\n"
435  "lbu $8, 16($3) \r\n"
436  "daddu $3, %[stride] \r\n"
437  "lbu $5, 0($3) \r\n"
438  "daddu $3, %[stride] \r\n"
439  "lbu $6, 0($3) \r\n"
440  "daddu $3, %[stride] \r\n"
441  "lbu $7, 0($3) \r\n"
442  "dsll $5, 16 \r\n"
443  "dsll $6, 32 \r\n"
444  "dsll $7, 48 \r\n"
445  "or $6, $7 \r\n"
446  "or $4, $5 \r\n"
447  "or $4, $6 \r\n"
448  "dmtc1 $4, $f0 \r\n"
449  "daddu $3, %[stride] \r\n"
450  "lbu $4, 0($3) \r\n"
451  "daddu $3, %[stride] \r\n"
452  "lbu $5, 0($3) \r\n"
453  "daddu $3, %[stride] \r\n"
454  "lbu $6, 0($3) \r\n"
455  "daddu $3, %[stride] \r\n"
456  "lbu $7, 0($3) \r\n"
457  "dsll $5, 16 \r\n"
458  "dsll $6, 32 \r\n"
459  "dsll $7, 48 \r\n"
460  "or $6, $7 \r\n"
461  "or $4, $5 \r\n"
462  "or $4, $6 \r\n"
463  "dmtc1 $4, $f2 \r\n"
464  "daddu $3, %[stride] \r\n"
465  "daddu $3, %[stride] \r\n"
466  "lbu $4, 0($3) \r\n"
467  "daddu $3, %[stride] \r\n"
468  "lbu $5, 0($3) \r\n"
469  "daddu $3, %[stride] \r\n"
470  "lbu $6, 0($3) \r\n"
471  "daddu $3, %[stride] \r\n"
472  "lbu $7, 0($3) \r\n"
473  "dsll $5, 16 \r\n"
474  "dsll $6, 32 \r\n"
475  "dsll $7, 48 \r\n"
476  "or $6, $7 \r\n"
477  "or $4, $5 \r\n"
478  "or $4, $6 \r\n"
479  "dmtc1 $4, $f4 \r\n"
480  "daddu $3, %[stride] \r\n"
481  "lbu $4, 0($3) \r\n"
482  "daddu $3, %[stride] \r\n"
483  "lbu $5, 0($3) \r\n"
484  "daddu $3, %[stride] \r\n"
485  "lbu $6, 0($3) \r\n"
486  "daddu $3, %[stride] \r\n"
487  "lbu $7, 0($3) \r\n"
488  "daddu $8, $7 \r\n"
489  "daddiu $8, 1 \r\n"
490  "dsll $8, 4 \r\n"
491  "dsll $5, 16 \r\n"
492  "dsll $6, 32 \r\n"
493  "dsll $7, 48 \r\n"
494  "or $6, $7 \r\n"
495  "or $4, $5 \r\n"
496  "or $4, $6 \r\n"
497  "dmtc1 $4, $f6 \r\n"
498  "pmullh $f0, $f0, $f20 \r\n"
499  "pmullh $f2, $f2, $f22 \r\n"
500  "pmullh $f4, $f4, $f24 \r\n"
501  "pmullh $f6, $f6, $f26 \r\n"
502  "paddsh $f0, $f0, $f4 \r\n"
503  "paddsh $f2, $f2, $f6 \r\n"
504  "paddsh $f0, $f0, $f2 \r\n"
505  "pshufh $f2, $f0, $f28 \r\n"
506  "paddsh $f0, $f0, $f2 \r\n"
507  "pshufh $f2, $f0, $f30 \r\n"
508  "paddsh $f12, $f0, $f2 \r\n"
509  "dmfc1 $2, $f10 \r\n"
510  "dsll $2, 48 \r\n"
511  "dsra $2, 48 \r\n"
512  "dmfc1 $3, $f12 \r\n"
513  "dsll $3, 48 \r\n"
514  "dsra $3, 48 \r\n"
515  "beqz %[svq3], 1f \r\n"
516  "dli $4, 4 \r\n"
517  "ddiv $2, $4 \r\n"
518  "ddiv $3, $4 \r\n"
519  "dli $4, 5 \r\n"
520  "dmul $2, $4 \r\n"
521  "dmul $3, $4 \r\n"
522  "dli $4, 16 \r\n"
523  "ddiv $2, $4 \r\n"
524  "ddiv $3, $4 \r\n"
525  "daddu $4, $2, $0 \r\n"
526  "daddu $2, $3, $0 \r\n"
527  "daddu $3, $4, $0 \r\n"
528  "b 2f \r\n"
529  "1: \r\n"
530  "beqz %[rv40], 1f \r\n"
531  "dsra $4, $2, 2 \r\n"
532  "daddu $2, $4 \r\n"
533  "dsra $4, $3, 2 \r\n"
534  "daddu $3, $4 \r\n"
535  "dsra $2, 4 \r\n"
536  "dsra $3, 4 \r\n"
537  "b 2f \r\n"
538  "1: \r\n"
539  "dli $4, 5 \r\n"
540  "dmul $2, $4 \r\n"
541  "dmul $3, $4 \r\n"
542  "daddiu $2, 32 \r\n"
543  "daddiu $3, 32 \r\n"
544  "dsra $2, 6 \r\n"
545  "dsra $3, 6 \r\n"
546  "2: \r\n"
547  "daddu $5, $2, $3 \r\n"
548  "dli $4, 7 \r\n"
549  "dmul $5, $4 \r\n"
550  "dsubu $8, $5 \r\n"
551  "dmtc1 $0, $f8 \r\n"
552  "dmtc1 $2, $f0 \r\n"
553  "pshufh $f0, $f0, $f8 \r\n"
554  "dmtc1 $3, $f10 \r\n"
555  "pshufh $f10, $f10, $f8 \r\n"
556  "dmtc1 $8, $f12 \r\n"
557  "pshufh $f12, $f12, $f8 \r\n"
558  "dli $4, 5 \r\n"
559  "dmtc1 $4, $f14 \r\n"
560  "pmullh $f2, %[ff_pw_0to3], $f0 \r\n"
561  "pmullh $f4, %[ff_pw_4to7], $f0 \r\n"
562  "pmullh $f6, %[ff_pw_8tob], $f0 \r\n"
563  "pmullh $f8, %[ff_pw_ctof], $f0 \r\n"
564  "daddu $3, %[src], $0 \r\n"
565  "dli $2, 16 \r\n"
566  "1: \r\n"
567  "paddsh $f16, $f2, $f12 \r\n"
568  "psrah $f16, $f16, $f14 \r\n"
569  "paddsh $f18, $f4, $f12 \r\n"
570  "psrah $f18, $f18, $f14 \r\n"
571  "packushb $f20, $f16, $f18 \r\n"
572  "gssdlc1 $f20, 7($3) \r\n"
573  "gssdrc1 $f20, 0($3) \r\n"
574  "paddsh $f16, $f6, $f12 \r\n"
575  "psrah $f16, $f16, $f14 \r\n"
576  "paddsh $f18, $f8, $f12 \r\n"
577  "psrah $f18, $f18, $f14 \r\n"
578  "packushb $f20, $f16, $f18 \r\n"
579  "gssdlc1 $f20, 15($3) \r\n"
580  "gssdrc1 $f20, 8($3) \r\n"
581  "paddsh $f12, $f12, $f10 \r\n"
582  "daddu $3, %[stride] \r\n"
583  "daddiu $2, -1 \r\n"
584  "bnez $2, 1b \r\n"
585  ::[src]"r"(src),[stride]"r"(stride),[svq3]"r"(svq3),[rv40]"r"(rv40),
590  : "$2","$3","$4","$5","$6","$7","$8","$f0","$f2","$f4","$f6","$f8",
591  "$f10","$f12","$f14","$f16","$f18","$f20","$f22","$f24","$f26",
592  "$f28","$f30"
593  );
594 }
595 
597 {
598  ff_pred16x16_plane_compat_8_mmi(src, stride, 1, 0);
599 }
600 
602 {
603  ff_pred16x16_plane_compat_8_mmi(src, stride, 0, 1);
604 }
605 
607 {
608  ff_pred16x16_plane_compat_8_mmi(src, stride, 0, 0);
609 }
610 
612 {
613  __asm__ volatile (
614  "dli $2, 2 \r\n"
615  "xor $f0, $f0, $f0 \r\n"
616  "xor $f2, $f2, $f2 \r\n"
617  "xor $f30, $f30, $f30 \r\n"
618  "negu $3, %[stride] \r\n"
619  "daddu $3, $3, %[src] \r\n"
620  "gsldlc1 $f4, 7($3) \r\n"
621  "gsldrc1 $f4, 0($3) \r\n"
622  "punpcklbh $f0, $f4, $f30 \r\n"
623  "punpckhbh $f2, $f4, $f30 \r\n"
624  "biadd $f0, $f0 \r\n"
625  "biadd $f2, $f2 \r\n"
626  "pshufh $f0, $f0, $f30 \r\n"
627  "pshufh $f2, $f2, $f30 \r\n"
628  "dmtc1 $2, $f4 \r\n"
629  "pshufh $f4, $f4, $f30 \r\n"
630  "paddush $f0, $f0, $f4 \r\n"
631  "paddush $f2, $f2, $f4 \r\n"
632  "dmtc1 $2, $f4 \r\n"
633  "psrlh $f0, $f0, $f4 \r\n"
634  "psrlh $f2, $f2, $f4 \r\n"
635  "packushb $f4, $f0, $f2 \r\n"
636  "dli $2, 8 \r\n"
637  "1: \r\n"
638  "gssdlc1 $f4, 7(%[src]) \r\n"
639  "gssdrc1 $f4, 0(%[src]) \r\n"
640  "daddu %[src], %0, %[stride] \r\n"
641  "daddiu $2, $2, -1 \r\n"
642  "bnez $2, 1b \r\n"
643  ::[src]"r"(src),[stride]"r"(stride)
644  : "$2","$3","$f0","$f2","$f4","$f30"
645  );
646 }
647 
649 {
650  __asm__ volatile (
651  "negu $2, %[stride] \r\n"
652  "daddu $2, $2, %[src] \r\n"
653  "daddiu $5, $2, 4 \r\n"
654  "lbu $6, 0($2) \r\n"
655  "daddu $3, $0, $6 \r\n"
656  "daddiu $2, 1 \r\n"
657  "lbu $6, 0($5) \r\n"
658  "daddu $4, $0, $6 \r\n"
659  "daddiu $5, 1 \r\n"
660  "lbu $6, 0($2) \r\n"
661  "daddu $3, $3, $6 \r\n"
662  "daddiu $2, 1 \r\n"
663  "lbu $6, 0($5) \r\n"
664  "daddu $4, $4, $6 \r\n"
665  "daddiu $5, 1 \r\n"
666  "lbu $6, 0($2) \r\n"
667  "daddu $3, $3, $6 \r\n"
668  "daddiu $2, 1 \r\n"
669  "lbu $6, 0($5) \r\n"
670  "daddu $4, $4, $6 \r\n"
671  "daddiu $5, 1 \r\n"
672  "lbu $6, 0($2) \r\n"
673  "daddu $3, $3, $6 \r\n"
674  "daddiu $2, 1 \r\n"
675  "lbu $6, 0($5) \r\n"
676  "daddu $4, $4, $6 \r\n"
677  "daddiu $5, 1 \r\n"
678  "dli $6, -1 \r\n"
679  "daddu $6, $6, %[src] \r\n"
680  "lbu $5, 0($6) \r\n"
681  "daddu $7, $0, $5 \r\n"
682  "daddu $6, $6, %[stride] \r\n"
683  "lbu $5, 0($6) \r\n"
684  "daddu $7, $7, $5 \r\n"
685  "daddu $6, $6, %[stride] \r\n"
686  "lbu $5, 0($6) \r\n"
687  "daddu $7, $7, $5 \r\n"
688  "daddu $6, $6, %[stride] \r\n"
689  "lbu $5, 0($6) \r\n"
690  "daddu $7, $7, $5 \r\n"
691  "daddu $6, $6, %[stride] \r\n"
692  "lbu $5, 0($6) \r\n"
693  "daddu $8, $0, $5 \r\n"
694  "daddu $6, $6, %[stride] \r\n"
695  "lbu $5, 0($6) \r\n"
696  "daddu $8, $8, $5 \r\n"
697  "daddu $6, $6, %[stride] \r\n"
698  "lbu $5, 0($6) \r\n"
699  "daddu $8, $8, $5 \r\n"
700  "daddu $6, $6, %[stride] \r\n"
701  "lbu $5, 0($6) \r\n"
702  "daddu $8, $8, $5 \r\n"
703  "daddu $3, $3, $7 \r\n"
704  "daddiu $3, $3, 4 \r\n"
705  "daddiu $4, $4, 2 \r\n"
706  "daddiu $5, $8, 2 \r\n"
707  "daddu $6, $4, $5 \r\n"
708  "dsrl $3, 3 \r\n"
709  "dsrl $4, 2 \r\n"
710  "dsrl $5, 2 \r\n"
711  "dsrl $6, 3 \r\n"
712  "xor $f30, $f30, $f30 \r\n"
713  "dmtc1 $3, $f0 \r\n"
714  "pshufh $f0, $f0, $f30 \r\n"
715  "dmtc1 $4, $f2 \r\n"
716  "pshufh $f2, $f2, $f30 \r\n"
717  "dmtc1 $5, $f4 \r\n"
718  "pshufh $f4, $f4, $f30 \r\n"
719  "dmtc1 $6, $f6 \r\n"
720  "pshufh $f6, $f6, $f30 \r\n"
721  "packushb $f0, $f0, $f2 \r\n"
722  "packushb $f2, $f4, $f6 \r\n"
723  "daddu $2, $0, %[src] \r\n"
724  "sdc1 $f0, 0($2) \r\n"
725  "daddu $2, $2, %[stride] \r\n"
726  "sdc1 $f0, 0($2) \r\n"
727  "daddu $2, $2, %[stride] \r\n"
728  "sdc1 $f0, 0($2) \r\n"
729  "daddu $2, $2, %[stride] \r\n"
730  "sdc1 $f0, 0($2) \r\n"
731  "daddu $2, $2, %[stride] \r\n"
732  "sdc1 $f2, 0($2) \r\n"
733  "daddu $2, $2, %[stride] \r\n"
734  "sdc1 $f2, 0($2) \r\n"
735  "daddu $2, $2, %[stride] \r\n"
736  "sdc1 $f2, 0($2) \r\n"
737  "daddu $2, $2, %[stride] \r\n"
738  "sdc1 $f2, 0($2) \r\n"
739  ::[src]"r"(src),[stride]"r"(stride)
740  : "$2","$3","$4","$5","$6","$7","$8","$f0","$f2","$f4","$f6","$f30"
741  );
742 }
743 
745 {
746  __asm__ volatile (
747  "gsldlc1 $f2, 7(%[srcA]) \r\n"
748  "gsldrc1 $f2, 0(%[srcA]) \r\n"
749  "dli $8, 16 \r\n"
750  "1: \r\n"
751  "gssdlc1 $f2, 7(%[src]) \r\n"
752  "gssdrc1 $f2, 0(%[src]) \r\n"
753  "daddu %[src], %[src], %[stride] \r\n"
754  "daddi $8, $8, -1 \r\n"
755  "bnez $8, 1b \r\n"
756  : [src]"+&r"(src)
757  : [stride]"r"(stride),[srcA]"r"(src-stride)
758  : "$8","$f2"
759  );
760 }
761 
763 {
764  __asm__ volatile (
765  "daddiu $2, %[src], -1 \r\n"
766  "daddu $3, %[src], $0 \r\n"
767  "dli $6, 0x10 \r\n"
768  "1: \r\n"
769  "lbu $4, 0($2) \r\n"
770  "dmul $5, $4, %[ff_pb_1] \r\n"
771  "sdl $5, 7($3) \r\n"
772  "sdr $5, 0($3) \r\n"
773  "daddu $2, %[stride] \r\n"
774  "daddu $3, %[stride] \r\n"
775  "daddiu $6, -1 \r\n"
776  "bnez $6, 1b \r\n"
777  ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
778  : "$2","$3","$4","$5","$6"
779  );
780 }
void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:762
const uint64_t ff_pw_ctof
Definition: constants.c:49
void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:374
static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride, const int svq3, const int rv40)
Definition: h264pred_mmi.c:394
const uint64_t ff_pw_1to4
Definition: constants.c:44
void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:744
void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:110
uint8_t
void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:71
const uint64_t ff_pw_0to3
Definition: constants.c:46
void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:332
#define U(x)
Definition: vp56_arith.h:37
const uint64_t ff_pw_4to7
Definition: constants.c:47
void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:180
void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:49
void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:606
void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:266
const uint64_t ff_pw_m8tom5
Definition: constants.c:42
#define src1
Definition: h264pred.c:139
AVS_Value src
Definition: avisynth_c.h:482
void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:27
void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:601
#define src0
Definition: h264pred.c:138
const uint64_t ff_pw_5to8
Definition: constants.c:45
const uint64_t ff_pw_8tob
Definition: constants.c:48
const uint64_t ff_pb_1
Definition: constants.c:51
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:648
const uint64_t ff_pw_m4tom1
Definition: constants.c:43
void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:611
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> dc
#define stride
void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:355
void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:596