30 ptrdiff_t line_size,
int h)
37 PTR_ADDU "%[addr1], %[line_size], %[line_size] \n\t"
39 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
40 "uld %[low32], 0x00(%[pixels]) \n\t"
41 "mtc1 %[low32], %[ftmp0] \n\t"
42 "uld %[low32], 0x00(%[addr0]) \n\t"
43 "mtc1 %[low32], %[ftmp1] \n\t"
44 "swc1 %[ftmp0], 0x00(%[block]) \n\t"
45 "gsswxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
46 PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
47 PTR_ADDU "%[block], %[block], %[addr1] \n\t"
49 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
50 "uld %[low32], 0x00(%[pixels]) \n\t"
51 "mtc1 %[low32], %[ftmp0] \n\t"
52 "uld %[low32], 0x00(%[addr0]) \n\t"
53 "mtc1 %[low32], %[ftmp1] \n\t"
54 "swc1 %[ftmp0], 0x00(%[block]) \n\t"
55 "gsswxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
56 PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
57 PTR_ADDU "%[block], %[block], %[addr1] \n\t"
61 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
62 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
66 : [line_size]
"r"((
mips_reg)line_size)
72 ptrdiff_t line_size,
int h)
78 PTR_ADDU "%[addr1], %[line_size], %[line_size] \n\t"
80 "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
81 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
82 "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
83 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
84 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
85 "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
86 "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
87 PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
88 PTR_ADDU "%[block], %[block], %[addr1] \n\t"
90 "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
91 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
92 "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
93 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
94 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
95 "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
96 "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
97 PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
98 PTR_ADDU "%[block], %[block], %[addr1] \n\t"
102 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
103 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
106 : [line_size]
"r"((
mips_reg)line_size)
112 ptrdiff_t line_size,
int h)
118 PTR_ADDU "%[addr1], %[line_size], %[line_size] \n\t"
120 "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
121 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
122 "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
123 "gsldlc1 %[ftmp2], 0x0f(%[pixels]) \n\t"
124 "gsldrc1 %[ftmp2], 0x08(%[pixels]) \n\t"
125 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
126 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
127 "gsldlc1 %[ftmp3], 0x0f(%[addr0]) \n\t"
128 "gsldrc1 %[ftmp3], 0x08(%[addr0]) \n\t"
129 "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
130 "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
131 "sdc1 %[ftmp2], 0x08(%[block]) \n\t"
132 "gssdxc1 %[ftmp3], 0x08(%[block], %[line_size]) \n\t"
133 PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
134 PTR_ADDU "%[block], %[block], %[addr1] \n\t"
136 "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
137 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
138 "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
139 "gsldlc1 %[ftmp2], 0x0f(%[pixels]) \n\t"
140 "gsldrc1 %[ftmp2], 0x08(%[pixels]) \n\t"
141 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
142 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
143 "gsldlc1 %[ftmp3], 0x0f(%[addr0]) \n\t"
144 "gsldrc1 %[ftmp3], 0x08(%[addr0]) \n\t"
145 "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
146 "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
147 "sdc1 %[ftmp2], 0x08(%[block]) \n\t"
148 "gssdxc1 %[ftmp3], 0x08(%[block], %[line_size]) \n\t"
149 PTR_ADDU "%[pixels], %[pixels], %[addr1] \n\t"
150 PTR_ADDU "%[block], %[block], %[addr1] \n\t"
154 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
155 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
156 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
159 : [line_size]
"r"((
mips_reg)line_size)
165 ptrdiff_t line_size,
int h)
172 PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
174 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
175 "uld %[low32], 0x00(%[pixels]) \n\t"
176 "mtc1 %[low32], %[ftmp0] \n\t"
177 "uld %[low32], 0x00(%[addr0]) \n\t"
178 "mtc1 %[low32], %[ftmp1] \n\t"
179 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
180 "uld %[low32], 0x00(%[block]) \n\t"
181 "mtc1 %[low32], %[ftmp2] \n\t"
182 "uld %[low32], 0x00(%[addr1]) \n\t"
183 "mtc1 %[low32], %[ftmp3] \n\t"
184 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
185 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
186 "swc1 %[ftmp0], 0x00(%[block]) \n\t"
187 "gsswxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
188 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
189 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
191 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
192 "uld %[low32], 0x00(%[pixels]) \n\t"
193 "mtc1 %[low32], %[ftmp0] \n\t"
194 "uld %[low32], 0x00(%[addr0]) \n\t"
195 "mtc1 %[low32], %[ftmp1] \n\t"
196 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
197 "uld %[low32], 0x00(%[block]) \n\t"
198 "mtc1 %[low32], %[ftmp2] \n\t"
199 "uld %[low32], 0x00(%[addr1]) \n\t"
200 "mtc1 %[low32], %[ftmp3] \n\t"
201 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
202 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
203 "swc1 %[ftmp0], 0x00(%[block]) \n\t"
204 "gsswxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
205 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
206 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
210 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
211 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
212 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
213 [addr2]
"=&r"(addr[2]),
217 : [line_size]
"r"((
mips_reg)line_size)
223 ptrdiff_t line_size,
int h)
229 PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
231 "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
232 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
233 "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
234 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
235 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
236 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
237 "gsldlc1 %[ftmp2], 0x07(%[block]) \n\t"
238 "gsldrc1 %[ftmp2], 0x00(%[block]) \n\t"
239 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
240 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
241 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
242 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
243 "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
244 "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
245 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
246 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
248 "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
249 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
250 "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
251 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
252 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
253 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
254 "gsldlc1 %[ftmp2], 0x07(%[block]) \n\t"
255 "gsldrc1 %[ftmp2], 0x00(%[block]) \n\t"
256 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
257 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
258 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
259 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
260 "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
261 "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
262 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
263 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
267 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
268 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
269 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
270 [addr2]
"=&r"(addr[2]),
273 : [line_size]
"r"((
mips_reg)line_size)
279 ptrdiff_t line_size,
int h)
285 PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
287 "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
288 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
289 "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
290 "gsldlc1 %[ftmp4], 0x0f(%[pixels]) \n\t"
291 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
292 "gsldrc1 %[ftmp4], 0x08(%[pixels]) \n\t"
293 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
294 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
295 "gsldlc1 %[ftmp5], 0x0f(%[addr0]) \n\t"
296 "gsldrc1 %[ftmp5], 0x08(%[addr0]) \n\t"
297 "gsldlc1 %[ftmp2], 0x07(%[block]) \n\t"
298 "gsldrc1 %[ftmp2], 0x00(%[block]) \n\t"
299 "gsldlc1 %[ftmp6], 0x0f(%[block]) \n\t"
300 "gsldrc1 %[ftmp6], 0x08(%[block]) \n\t"
301 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
302 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
303 "gsldlc1 %[ftmp7], 0x0f(%[addr1]) \n\t"
304 "gsldrc1 %[ftmp7], 0x08(%[addr1]) \n\t"
305 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
306 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
307 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
308 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
309 "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
310 "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
311 "sdc1 %[ftmp4], 0x08(%[block]) \n\t"
312 "gssdxc1 %[ftmp5], 0x08(%[block], %[line_size]) \n\t"
313 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
314 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
316 "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
317 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
318 "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
319 "gsldlc1 %[ftmp4], 0x0f(%[pixels]) \n\t"
320 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
321 "gsldrc1 %[ftmp4], 0x08(%[pixels]) \n\t"
322 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
323 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
324 "gsldlc1 %[ftmp5], 0x0f(%[addr0]) \n\t"
325 "gsldrc1 %[ftmp5], 0x08(%[addr0]) \n\t"
326 "gsldlc1 %[ftmp2], 0x07(%[block]) \n\t"
327 "gsldrc1 %[ftmp2], 0x00(%[block]) \n\t"
328 "gsldlc1 %[ftmp6], 0x0f(%[block]) \n\t"
329 "gsldrc1 %[ftmp6], 0x08(%[block]) \n\t"
330 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
331 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
332 "gsldlc1 %[ftmp7], 0x0f(%[addr1]) \n\t"
333 "gsldrc1 %[ftmp7], 0x08(%[addr1]) \n\t"
334 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
335 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
336 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
337 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
338 "sdc1 %[ftmp0], 0x00(%[block]) \n\t"
339 "gssdxc1 %[ftmp1], 0x00(%[block], %[line_size]) \n\t"
340 "sdc1 %[ftmp4], 0x08(%[block]) \n\t"
341 "gssdxc1 %[ftmp5], 0x08(%[block], %[line_size]) \n\t"
342 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
343 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
347 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
348 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
349 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
350 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
351 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
352 [addr2]
"=&r"(addr[2]),
355 : [line_size]
"r"((
mips_reg)line_size)
361 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
369 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
370 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
371 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
373 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
374 "uld %[low32], 0x00(%[src1]) \n\t"
375 "mtc1 %[low32], %[ftmp0] \n\t"
376 "uld %[low32], 0x00(%[addr0]) \n\t"
377 "mtc1 %[low32], %[ftmp1] \n\t"
378 "uld %[low32], 0x00(%[src2]) \n\t"
379 "mtc1 %[low32], %[ftmp2] \n\t"
380 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
381 "uld %[low32], 0x00(%[addr1]) \n\t"
382 "mtc1 %[low32], %[ftmp3] \n\t"
383 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
384 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
385 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
386 "swc1 %[ftmp0], 0x00(%[dst]) \n\t"
387 "gsswxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
388 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
389 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
391 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
392 "uld %[low32], 0x00(%[src1]) \n\t"
393 "mtc1 %[low32], %[ftmp0] \n\t"
394 "uld %[low32], 0x00(%[addr0]) \n\t"
395 "mtc1 %[low32], %[ftmp1] \n\t"
396 "uld %[low32], 0x00(%[src2]) \n\t"
397 "mtc1 %[low32], %[ftmp2] \n\t"
398 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
399 "uld %[low32], 0x00(%[addr1]) \n\t"
400 "mtc1 %[low32], %[ftmp3] \n\t"
401 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
402 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
403 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
404 "swc1 %[ftmp0], 0x00(%[dst]) \n\t"
405 "gsswxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
406 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
407 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
411 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
412 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
413 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
414 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
415 [addr4]
"=&r"(addr[4]),
417 [dst]
"+&r"(dst), [src1]
"+&r"(src1),
418 [src2]
"+&r"(src2), [h]
"+&r"(h)
419 : [dst_stride]
"r"((
mips_reg)dst_stride),
420 [src_stride1]
"r"((
mips_reg)src_stride1),
421 [src_stride2]
"r"((
mips_reg)src_stride2)
427 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
434 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
435 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
436 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
438 "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
439 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
440 "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
441 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
442 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
443 "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
444 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
445 "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
446 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
447 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
448 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
449 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
450 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
451 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
452 "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
453 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
454 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
456 "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
457 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
458 "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
459 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
460 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
461 "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
462 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
463 "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
464 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
465 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
466 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
467 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
468 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
469 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
470 "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
471 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
472 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
476 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
477 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
478 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
479 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
480 [addr4]
"=&r"(addr[4]),
482 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
483 : [dst_stride]
"r"((
mips_reg)dst_stride),
484 [src_stride1]
"r"((
mips_reg)src_stride1),
485 [src_stride2]
"r"((
mips_reg)src_stride2)
491 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
498 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
499 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
500 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
502 "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
503 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
504 "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
505 "gsldlc1 %[ftmp4], 0x0f(%[src1]) \n\t"
506 "gsldrc1 %[ftmp4], 0x08(%[src1]) \n\t"
507 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
508 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
509 "gsldlc1 %[ftmp5], 0x0f(%[addr0]) \n\t"
510 "gsldrc1 %[ftmp5], 0x08(%[addr0]) \n\t"
511 "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
512 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
513 "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
514 "gsldlc1 %[ftmp6], 0x0f(%[src2]) \n\t"
515 "gsldrc1 %[ftmp6], 0x08(%[src2]) \n\t"
516 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
517 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
518 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
519 "gsldlc1 %[ftmp7], 0x0f(%[addr1]) \n\t"
520 "gsldrc1 %[ftmp7], 0x08(%[addr1]) \n\t"
521 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
522 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
523 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
524 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
525 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
526 "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
527 "sdc1 %[ftmp4], 0x08(%[dst]) \n\t"
528 "gssdxc1 %[ftmp5], 0x08(%[dst], %[dst_stride]) \n\t"
529 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
530 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
532 "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
533 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
534 "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
535 "gsldlc1 %[ftmp4], 0x0f(%[src1]) \n\t"
536 "gsldrc1 %[ftmp4], 0x08(%[src1]) \n\t"
537 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
538 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
539 "gsldlc1 %[ftmp5], 0x0f(%[addr0]) \n\t"
540 "gsldrc1 %[ftmp5], 0x08(%[addr0]) \n\t"
541 "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
542 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
543 "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
544 "gsldlc1 %[ftmp6], 0x0f(%[src2]) \n\t"
545 "gsldrc1 %[ftmp6], 0x08(%[src2]) \n\t"
546 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
547 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
548 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
549 "gsldlc1 %[ftmp7], 0x0f(%[addr1]) \n\t"
550 "gsldrc1 %[ftmp7], 0x08(%[addr1]) \n\t"
551 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
552 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
553 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
554 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
555 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
556 "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
557 "sdc1 %[ftmp4], 0x08(%[dst]) \n\t"
558 "gssdxc1 %[ftmp5], 0x08(%[dst], %[dst_stride]) \n\t"
559 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
560 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
564 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
565 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
566 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
567 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
568 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
569 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
570 [addr4]
"=&r"(addr[4]),
572 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
573 : [dst_stride]
"r"((
mips_reg)dst_stride),
574 [src_stride1]
"r"((
mips_reg)src_stride1),
575 [src_stride2]
"r"((
mips_reg)src_stride2)
581 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
589 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
590 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
591 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
593 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
594 "uld %[low32], 0x00(%[src1]) \n\t"
595 "mtc1 %[low32], %[ftmp0] \n\t"
596 "uld %[low32], 0x00(%[addr0]) \n\t"
597 "mtc1 %[low32], %[ftmp1] \n\t"
598 "uld %[low32], 0x00(%[src2]) \n\t"
599 "mtc1 %[low32], %[ftmp2] \n\t"
600 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
601 "uld %[low32], 0x00(%[addr1]) \n\t"
602 "mtc1 %[low32], %[ftmp3] \n\t"
603 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
604 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
605 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
606 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
607 "uld %[low32], 0x00(%[dst]) \n\t"
608 "mtc1 %[low32], %[ftmp4] \n\t"
609 "uld %[low32], 0x00(%[addr5]) \n\t"
610 "mtc1 %[low32], %[ftmp5] \n\t"
611 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
612 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
613 "swc1 %[ftmp0], 0x00(%[dst]) \n\t"
614 "gsswxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
615 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
616 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
618 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
619 "uld %[low32], 0x00(%[src1]) \n\t"
620 "mtc1 %[low32], %[ftmp0] \n\t"
621 "uld %[low32], 0x00(%[addr0]) \n\t"
622 "mtc1 %[low32], %[ftmp1] \n\t"
623 "uld %[low32], 0x00(%[src2]) \n\t"
624 "mtc1 %[low32], %[ftmp2] \n\t"
625 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
626 "uld %[low32], 0x00(%[addr1]) \n\t"
627 "mtc1 %[low32], %[ftmp3] \n\t"
628 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
629 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
630 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
631 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
632 "uld %[low32], 0x00(%[dst]) \n\t"
633 "mtc1 %[low32], %[ftmp4] \n\t"
634 "uld %[low32], 0x00(%[addr5]) \n\t"
635 "mtc1 %[low32], %[ftmp5] \n\t"
636 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
637 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
638 "swc1 %[ftmp0], 0x00(%[dst]) \n\t"
639 "gsswxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
640 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
641 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
645 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
646 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
647 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
648 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
649 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
650 [addr4]
"=&r"(addr[4]), [addr5]
"=&r"(addr[5]),
653 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
654 : [dst_stride]
"r"((
mips_reg)dst_stride),
655 [src_stride1]
"r"((
mips_reg)src_stride1),
656 [src_stride2]
"r"((
mips_reg)src_stride2)
662 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
669 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
670 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
671 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
673 "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
674 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
675 "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
676 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
677 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
678 "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
679 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
680 "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
681 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
682 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
683 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
684 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
685 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
686 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
687 "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t"
688 "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t"
689 "gsldlc1 %[ftmp5], 0x07(%[addr5]) \n\t"
690 "gsldrc1 %[ftmp5], 0x00(%[addr5]) \n\t"
691 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
692 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
693 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
694 "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
695 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
696 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
698 "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
699 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
700 "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
701 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
702 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
703 "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
704 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
705 "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
706 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
707 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
708 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
709 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
710 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
711 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
712 "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t"
713 "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t"
714 "gsldlc1 %[ftmp5], 0x07(%[addr5]) \n\t"
715 "gsldrc1 %[ftmp5], 0x00(%[addr5]) \n\t"
716 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
717 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
718 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
719 "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
720 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
721 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
725 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
726 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
727 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
728 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
729 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
730 [addr4]
"=&r"(addr[4]), [addr5]
"=&r"(addr[5]),
731 [dst]
"+&r"(dst), [src1]
"+&r"(src1),
732 [src2]
"+&r"(src2), [h]
"+&r"(h)
733 : [dst_stride]
"r"((
mips_reg)dst_stride),
734 [src_stride1]
"r"((
mips_reg)src_stride1),
735 [src_stride2]
"r"((
mips_reg)src_stride2)
741 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
747 src_stride1, src_stride2, h);
751 ptrdiff_t line_size,
int h)
758 ptrdiff_t line_size,
int h)
765 ptrdiff_t line_size,
int h)
772 ptrdiff_t line_size,
int h)
779 ptrdiff_t line_size,
int h)
786 ptrdiff_t line_size,
int h)
793 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
800 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
801 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
802 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
803 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
805 "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
806 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
807 "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
808 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
809 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
810 "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
811 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
812 "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
813 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
814 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
815 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
816 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
817 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
818 "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
819 "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
820 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
821 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
822 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
823 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
824 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
825 "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
826 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
827 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
829 "gsldlc1 %[ftmp0], 0x07(%[src1]) \n\t"
830 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
831 "gsldrc1 %[ftmp0], 0x00(%[src1]) \n\t"
832 "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
833 "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
834 "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
835 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
836 "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
837 "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
838 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
839 "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
840 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
841 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
842 "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
843 "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
844 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
845 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
846 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
847 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
848 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
849 "gssdxc1 %[ftmp1], 0x00(%[dst], %[dst_stride]) \n\t"
850 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
851 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
855 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
856 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
857 [ftmp4]
"=&f"(ftmp[4]),
858 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
859 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
860 [addr4]
"=&r"(addr[4]),
861 [dst]
"+&r"(dst), [src1]
"+&r"(src1),
862 [src2]
"+&r"(src2), [h]
"+&r"(h)
863 : [dst_stride]
"r"((
mips_reg)dst_stride),
864 [src_stride1]
"r"((
mips_reg)src_stride1),
865 [src_stride2]
"r"((
mips_reg)src_stride2)
871 ptrdiff_t line_size,
int h)
874 line_size, line_size, h);
878 ptrdiff_t line_size,
int h)
885 ptrdiff_t line_size,
int h)
888 line_size, line_size, h);
892 ptrdiff_t line_size,
int h)
895 line_size, line_size, h);
899 ptrdiff_t line_size,
int h)
902 line_size, line_size, h);
906 ptrdiff_t line_size,
int h)
909 line_size, line_size, h);
913 ptrdiff_t line_size,
int h)
916 line_size, line_size, h);
920 ptrdiff_t line_size,
int h)
927 ptrdiff_t line_size,
int h)
930 line_size, line_size, line_size, h);
934 ptrdiff_t line_size,
int h)
941 ptrdiff_t line_size,
int h)
946 const uint32_t
b =
AV_RN32(pixels + 1);
947 uint32_t l0 = (a & 0x03030303UL) +
950 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
951 ((b & 0xFCFCFCFCUL) >> 2);
955 for (i = 0; i <
h; i += 2) {
957 uint32_t b =
AV_RN32(pixels + 1);
958 l1 = (a & 0x03030303UL) +
960 h1 = ((a & 0xFCFCFCFCUL) >> 2) +
961 ((b & 0xFCFCFCFCUL) >> 2);
962 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
967 l0 = (a & 0x03030303UL) +
970 h0 = ((a & 0xFCFCFCFCUL) >> 2) +
971 ((b & 0xFCFCFCFCUL) >> 2);
972 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
979 ptrdiff_t line_size,
int h)
986 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
987 "dli %[addr0], 0x0f \n\t"
988 "pcmpeqw %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
989 "dmtc1 %[addr0], %[ftmp8] \n\t"
990 "dli %[addr0], 0x01 \n\t"
991 "psrlh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
992 "dmtc1 %[addr0], %[ftmp8] \n\t"
993 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
995 "dli %[addr0], 0x02 \n\t"
996 "gsldlc1 %[ftmp0], 0x07(%[pixels]) \n\t"
997 "gsldrc1 %[ftmp0], 0x00(%[pixels]) \n\t"
998 "dmtc1 %[addr0], %[ftmp9] \n\t"
999 "gsldlc1 %[ftmp4], 0x08(%[pixels]) \n\t"
1000 "gsldrc1 %[ftmp4], 0x01(%[pixels]) \n\t"
1001 "mov.d %[ftmp1], %[ftmp0] \n\t"
1002 "mov.d %[ftmp5], %[ftmp4] \n\t"
1003 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1004 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1005 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1006 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1007 "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1008 "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1009 "xor %[addr0], %[addr0], %[addr0] \n\t"
1010 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
1013 PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
1014 "gsldlc1 %[ftmp0], 0x07(%[addr1]) \n\t"
1015 "gsldrc1 %[ftmp0], 0x00(%[addr1]) \n\t"
1016 "gsldlc1 %[ftmp2], 0x08(%[addr1]) \n\t"
1017 "gsldrc1 %[ftmp2], 0x01(%[addr1]) \n\t"
1018 "mov.d %[ftmp1], %[ftmp0] \n\t"
1019 "mov.d %[ftmp3], %[ftmp2] \n\t"
1020 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1021 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1022 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1023 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1024 "paddush %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1025 "paddush %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1026 "paddush %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1027 "paddush %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1028 "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1029 "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1030 "psrlh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1031 "psrlh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
1032 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1033 "gssdxc1 %[ftmp4], 0x00(%[block], %[addr0]) \n\t"
1034 PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
1035 PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
1036 "gsldlc1 %[ftmp2], 0x07(%[addr1]) \n\t"
1037 "gsldrc1 %[ftmp2], 0x00(%[addr1]) \n\t"
1038 "gsldlc1 %[ftmp4], 0x08(%[addr1]) \n\t"
1039 "gsldrc1 %[ftmp4], 0x01(%[addr1]) \n\t"
1040 "mov.d %[ftmp3], %[ftmp2] \n\t"
1041 "mov.d %[ftmp5], %[ftmp4] \n\t"
1042 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1043 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1044 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1045 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1046 "paddush %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
1047 "paddush %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1048 "paddush %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1049 "paddush %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1050 "paddush %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1051 "paddush %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1052 "psrlh %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1053 "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
1054 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1055 "gssdxc1 %[ftmp0], 0x00(%[block], %[addr0]) \n\t"
1056 PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
1058 "bnez %[h], 1b \n\t"
1059 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1060 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1061 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1062 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1063 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1064 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1065 [
h]
"+&r"(
h), [pixels]
"+&r"(pixels)
1073 for (j = 0; j < 2; j++) {
1075 const uint32_t
a =
AV_RN32(pixels);
1076 const uint32_t
b =
AV_RN32(pixels + 1);
1077 uint32_t l0 = (a & 0x03030303UL) +
1078 (b & 0x03030303UL) +
1080 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1081 ((b & 0xFCFCFCFCUL) >> 2);
1084 pixels += line_size;
1085 for (i = 0; i <
h; i += 2) {
1087 uint32_t b =
AV_RN32(pixels + 1);
1088 l1 = (a & 0x03030303UL) +
1090 h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1091 ((b & 0xFCFCFCFCUL) >> 2);
1092 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1093 pixels += line_size;
1097 l0 = (a & 0x03030303UL) +
1098 (b & 0x03030303UL) +
1100 h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1101 ((b & 0xFCFCFCFCUL) >> 2);
1102 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1103 pixels += line_size;
1106 pixels += 4 - line_size * (h + 1);
1107 block += 4 - line_size *
h;
1113 ptrdiff_t line_size,
int h)
1120 ptrdiff_t line_size,
int h)
1124 const uint32_t
a =
AV_RN32(pixels);
1125 const uint32_t
b =
AV_RN32(pixels + 1);
1126 uint32_t l0 = (a & 0x03030303UL) +
1127 (b & 0x03030303UL) +
1129 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1130 ((b & 0xFCFCFCFCUL) >> 2);
1133 pixels += line_size;
1134 for (i = 0; i <
h; i += 2) {
1136 uint32_t b =
AV_RN32(pixels + 1);
1137 l1 = (a & 0x03030303UL) +
1139 h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1140 ((b & 0xFCFCFCFCUL) >> 2);
1141 *((uint32_t *) block) =
rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1142 pixels += line_size;
1146 l0 = (a & 0x03030303UL) +
1147 (b & 0x03030303UL) +
1149 h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1150 ((b & 0xFCFCFCFCUL) >> 2);
1151 *((uint32_t *) block) =
rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1152 pixels += line_size;
1158 ptrdiff_t line_size,
int h)
1163 for (j = 0; j < 2; j++) {
1165 const uint32_t
a =
AV_RN32(pixels);
1166 const uint32_t
b =
AV_RN32(pixels + 1);
1167 uint32_t l0 = (a & 0x03030303UL) +
1168 (b & 0x03030303UL) +
1170 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1171 ((b & 0xFCFCFCFCUL) >> 2);
1174 pixels += line_size;
1175 for (i = 0; i <
h; i += 2) {
1177 uint32_t b =
AV_RN32(pixels + 1);
1178 l1 = (a & 0x03030303UL) +
1180 h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1181 ((b & 0xFCFCFCFCUL) >> 2);
1182 *((uint32_t *) block) =
rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1183 pixels += line_size;
1187 l0 = (a & 0x03030303UL) +
1188 (b & 0x03030303UL) +
1190 h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1191 ((b & 0xFCFCFCFCUL) >> 2);
1192 *((uint32_t *) block) =
rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1193 pixels += line_size;
1196 pixels += 4 - line_size * (h + 1);
1197 block += 4 - line_size *
h;
1202 ptrdiff_t line_size,
int h)
1209 ptrdiff_t line_size,
int h)
1214 for (j = 0; j < 2; j++) {
1216 const uint32_t
a =
AV_RN32(pixels);
1217 const uint32_t
b =
AV_RN32(pixels + 1);
1218 uint32_t l0 = (a & 0x03030303UL) +
1219 (b & 0x03030303UL) +
1221 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1222 ((b & 0xFCFCFCFCUL) >> 2);
1225 pixels += line_size;
1226 for (i = 0; i <
h; i += 2) {
1228 uint32_t b =
AV_RN32(pixels + 1);
1229 l1 = (a & 0x03030303UL) +
1231 h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1232 ((b & 0xFCFCFCFCUL) >> 2);
1233 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1234 pixels += line_size;
1238 l0 = (a & 0x03030303UL) +
1239 (b & 0x03030303UL) +
1241 h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1242 ((b & 0xFCFCFCFCUL) >> 2);
1243 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1244 pixels += line_size;
1247 pixels += 4 - line_size * (h + 1);
1248 block += 4 - line_size *
h;
1253 ptrdiff_t line_size,
int h)
void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
void ff_avg_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_no_rnd_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_no_rnd_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_no_rnd_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_no_rnd_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_no_rnd_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static uint32_t rnd_avg32(uint32_t a, uint32_t b)
void ff_avg_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_no_rnd_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_avg_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_no_rnd_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)