25 #define GET_DATA_H_MMI \ 26 "pmaddhw %[ftmp4], %[ftmp4], %[filter1] \n\t" \ 27 "pmaddhw %[ftmp5], %[ftmp5], %[filter2] \n\t" \ 28 "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ 29 "punpckhwd %[ftmp5], %[ftmp4], %[ftmp0] \n\t" \ 30 "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ 31 "pmaddhw %[ftmp6], %[ftmp6], %[filter1] \n\t" \ 32 "pmaddhw %[ftmp7], %[ftmp7], %[filter2] \n\t" \ 33 "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \ 34 "punpckhwd %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \ 35 "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \ 36 "punpcklwd %[srcl], %[ftmp4], %[ftmp6] \n\t" \ 37 "pmaddhw %[ftmp8], %[ftmp8], %[filter1] \n\t" \ 38 "pmaddhw %[ftmp9], %[ftmp9], %[filter2] \n\t" \ 39 "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ 40 "punpckhwd %[ftmp9], %[ftmp8], %[ftmp0] \n\t" \ 41 "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ 42 "pmaddhw %[ftmp10], %[ftmp10], %[filter1] \n\t" \ 43 "pmaddhw %[ftmp11], %[ftmp11], %[filter2] \n\t" \ 44 "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \ 45 "punpckhwd %[ftmp11], %[ftmp10], %[ftmp0] \n\t" \ 46 "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \ 47 "punpcklwd %[srch], %[ftmp8], %[ftmp10] \n\t" 49 #define GET_DATA_V_MMI \ 50 "punpcklhw %[srcl], %[ftmp4], %[ftmp5] \n\t" \ 51 "pmaddhw %[srcl], %[srcl], %[filter10] \n\t" \ 52 "punpcklhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \ 53 "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \ 54 "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ 55 "punpcklhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \ 56 "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \ 57 "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ 58 "punpcklhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \ 59 "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \ 60 "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ 61 "punpckhhw %[srch], %[ftmp4], %[ftmp5] \n\t" \ 62 "pmaddhw %[srch], %[srch], %[filter10] \n\t" \ 63 "punpckhhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \ 64 "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \ 65 "paddw %[srch], %[srch], %[ftmp12] \n\t" \ 66 "punpckhhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \ 67 "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \ 68 "paddw %[srch], %[srch], %[ftmp12] \n\t" \ 69 "punpckhhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \ 70 "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \ 71 "paddw %[srch], %[srch], %[ftmp12] \n\t" 84 "move %[tmp1], %[width] \n\t" 85 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 86 "gsldlc1 %[filter1], 0x03(%[filter]) \n\t" 87 "gsldrc1 %[filter1], 0x00(%[filter]) \n\t" 88 "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t" 89 "gsldrc1 %[filter2], 0x08(%[filter]) \n\t" 90 "li %[tmp0], 0x07 \n\t" 91 "dmtc1 %[tmp0], %[ftmp13] \n\t" 92 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" 95 "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" 96 "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" 97 "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t" 98 "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t" 99 "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t" 100 "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t" 101 "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t" 102 "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t" 103 "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t" 104 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 105 "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t" 106 "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 107 "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t" 108 "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 109 "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t" 110 "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" 111 PTR_ADDIU "%[width], %[width], -0x04 \n\t" 118 "packsswh %[srcl], %[srcl], %[srch] \n\t" 119 "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t" 120 "swc1 %[ftmp12], 0x00(%[dst]) \n\t" 124 "bnez %[width], 1b \n\t" 125 "move %[width], %[tmp1] \n\t" 126 PTR_ADDU "%[src], %[src], %[src_stride] \n\t" 127 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 128 PTR_ADDIU "%[height], %[height], -0x01 \n\t" 129 "bnez %[height], 1b \n\t" 130 : [srcl]
"=&f"(ftmp[0]), [srch]
"=&f"(ftmp[1]),
131 [
filter1]
"=&f"(ftmp[2]), [filter2]
"=&f"(ftmp[3]),
132 [ftmp0]
"=&f"(ftmp[4]), [ftmp4]
"=&f"(ftmp[5]),
133 [ftmp5]
"=&f"(ftmp[6]), [ftmp6]
"=&f"(ftmp[7]),
134 [ftmp7]
"=&f"(ftmp[8]), [ftmp8]
"=&f"(ftmp[9]),
135 [ftmp9]
"=&f"(ftmp[10]), [ftmp10]
"=&f"(ftmp[11]),
136 [ftmp11]
"=&f"(ftmp[12]), [ftmp12]
"=&f"(ftmp[13]),
137 [tmp0]
"=&r"(tmp[0]), [tmp1]
"=&r"(tmp[1]),
139 [dst]
"+&r"(dst), [
height]
"+&r"(h),
140 [ftmp13]
"=&f"(ftmp[14])
142 [src_stride]
"r"((
mips_reg)src_stride),
143 [dst_stride]
"r"((
mips_reg)dst_stride)
155 ptrdiff_t addr = src_stride;
160 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 161 "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t" 162 "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t" 163 "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t" 164 "gsldrc1 %[ftmp5], 0x08(%[filter]) \n\t" 165 "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t" 166 "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t" 167 "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t" 168 "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t" 169 "li %[tmp0], 0x07 \n\t" 170 "dmtc1 %[tmp0], %[ftmp13] \n\t" 171 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" 174 "gsldlc1 %[ftmp4], 0x07(%[src]) \n\t" 175 "gsldrc1 %[ftmp4], 0x00(%[src]) \n\t" 176 PTR_ADDU "%[tmp0], %[src], %[addr] \n\t" 177 "gsldlc1 %[ftmp5], 0x07(%[tmp0]) \n\t" 178 "gsldrc1 %[ftmp5], 0x00(%[tmp0]) \n\t" 179 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 180 "gsldlc1 %[ftmp6], 0x07(%[tmp0]) \n\t" 181 "gsldrc1 %[ftmp6], 0x00(%[tmp0]) \n\t" 182 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 183 "gsldlc1 %[ftmp7], 0x07(%[tmp0]) \n\t" 184 "gsldrc1 %[ftmp7], 0x00(%[tmp0]) \n\t" 185 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 186 "gsldlc1 %[ftmp8], 0x07(%[tmp0]) \n\t" 187 "gsldrc1 %[ftmp8], 0x00(%[tmp0]) \n\t" 188 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 189 "gsldlc1 %[ftmp9], 0x07(%[tmp0]) \n\t" 190 "gsldrc1 %[ftmp9], 0x00(%[tmp0]) \n\t" 191 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 192 "gsldlc1 %[ftmp10], 0x07(%[tmp0]) \n\t" 193 "gsldrc1 %[ftmp10], 0x00(%[tmp0]) \n\t" 194 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 195 "gsldlc1 %[ftmp11], 0x07(%[tmp0]) \n\t" 196 "gsldrc1 %[ftmp11], 0x00(%[tmp0]) \n\t" 197 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 198 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 199 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 200 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 201 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 202 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 203 "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 204 "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" 205 PTR_ADDIU "%[width], %[width], -0x04 \n\t" 212 "packsswh %[srcl], %[srcl], %[srch] \n\t" 213 "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t" 214 "swc1 %[ftmp12], 0x00(%[dst]) \n\t" 218 "bnez %[width], 1b \n\t" 219 PTR_SUBU "%[width], %[addr], %[src_stride] \n\t" 220 PTR_ADDU "%[src], %[src], %[src_stride] \n\t" 221 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 222 PTR_ADDIU "%[height], %[height], -0x01 \n\t" 223 "bnez %[height], 1b \n\t" 224 : [srcl]
"=&f"(ftmp[0]), [srch]
"=&f"(ftmp[1]),
225 [filter10]
"=&f"(ftmp[2]), [filter32]
"=&f"(ftmp[3]),
226 [filter54]
"=&f"(ftmp[4]), [filter76]
"=&f"(ftmp[5]),
227 [ftmp0]
"=&f"(ftmp[6]), [ftmp4]
"=&f"(ftmp[7]),
228 [ftmp5]
"=&f"(ftmp[8]), [ftmp6]
"=&f"(ftmp[9]),
229 [ftmp7]
"=&f"(ftmp[10]), [ftmp8]
"=&f"(ftmp[11]),
230 [ftmp9]
"=&f"(ftmp[12]), [ftmp10]
"=&f"(ftmp[13]),
231 [ftmp11]
"=&f"(ftmp[14]), [ftmp12]
"=&f"(ftmp[15]),
232 [
src]
"+&r"(
src), [dst]
"+&r"(dst),
234 [tmp0]
"=&r"(tmp[0]), [ftmp13]
"=&f"(ftmp[16])
236 [src_stride]
"r"((
mips_reg)src_stride),
237 [dst_stride]
"r"((
mips_reg)dst_stride),
245 const uint16_t *filter_x,
int32_t w,
255 "move %[tmp1], %[width] \n\t" 256 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 257 "gsldlc1 %[filter1], 0x03(%[filter]) \n\t" 258 "gsldrc1 %[filter1], 0x00(%[filter]) \n\t" 259 "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t" 260 "gsldrc1 %[filter2], 0x08(%[filter]) \n\t" 261 "li %[tmp0], 0x07 \n\t" 262 "dmtc1 %[tmp0], %[ftmp13] \n\t" 263 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" 266 "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" 267 "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" 268 "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t" 269 "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t" 270 "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t" 271 "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t" 272 "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t" 273 "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t" 274 "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t" 275 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 276 "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t" 277 "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 278 "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t" 279 "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 280 "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t" 281 "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" 282 PTR_ADDIU "%[width], %[width], -0x04 \n\t" 289 "packsswh %[srcl], %[srcl], %[srch] \n\t" 290 "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t" 291 "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t" 292 "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t" 293 "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t" 294 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 295 "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t" 296 "li %[tmp0], 0x10001 \n\t" 297 "dmtc1 %[tmp0], %[ftmp5] \n\t" 298 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 299 "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t" 300 "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t" 301 "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t" 302 "swc1 %[ftmp12], 0x00(%[dst]) \n\t" 306 "bnez %[width], 1b \n\t" 307 "move %[width], %[tmp1] \n\t" 308 PTR_ADDU "%[src], %[src], %[src_stride] \n\t" 309 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 310 PTR_ADDIU "%[height], %[height], -0x01 \n\t" 311 "bnez %[height], 1b \n\t" 312 : [srcl]
"=&f"(ftmp[0]), [srch]
"=&f"(ftmp[1]),
313 [
filter1]
"=&f"(ftmp[2]), [filter2]
"=&f"(ftmp[3]),
314 [ftmp0]
"=&f"(ftmp[4]), [ftmp4]
"=&f"(ftmp[5]),
315 [ftmp5]
"=&f"(ftmp[6]), [ftmp6]
"=&f"(ftmp[7]),
316 [ftmp7]
"=&f"(ftmp[8]), [ftmp8]
"=&f"(ftmp[9]),
317 [ftmp9]
"=&f"(ftmp[10]), [ftmp10]
"=&f"(ftmp[11]),
318 [ftmp11]
"=&f"(ftmp[12]), [ftmp12]
"=&f"(ftmp[13]),
319 [tmp0]
"=&r"(tmp[0]), [tmp1]
"=&r"(tmp[1]),
321 [dst]
"+&r"(dst), [
height]
"+&r"(h),
322 [ftmp13]
"=&f"(ftmp[14])
324 [src_stride]
"r"((
mips_reg)src_stride),
325 [dst_stride]
"r"((
mips_reg)dst_stride)
337 ptrdiff_t addr = src_stride;
342 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 343 "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t" 344 "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t" 345 "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t" 346 "gsldrc1 %[ftmp5], 0x08(%[filter]) \n\t" 347 "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t" 348 "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t" 349 "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t" 350 "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t" 351 "li %[tmp0], 0x07 \n\t" 352 "dmtc1 %[tmp0], %[ftmp13] \n\t" 353 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" 356 "gsldlc1 %[ftmp4], 0x07(%[src]) \n\t" 357 "gsldrc1 %[ftmp4], 0x00(%[src]) \n\t" 358 PTR_ADDU "%[tmp0], %[src], %[addr] \n\t" 359 "gsldlc1 %[ftmp5], 0x07(%[tmp0]) \n\t" 360 "gsldrc1 %[ftmp5], 0x00(%[tmp0]) \n\t" 361 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 362 "gsldlc1 %[ftmp6], 0x07(%[tmp0]) \n\t" 363 "gsldrc1 %[ftmp6], 0x00(%[tmp0]) \n\t" 364 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 365 "gsldlc1 %[ftmp7], 0x07(%[tmp0]) \n\t" 366 "gsldrc1 %[ftmp7], 0x00(%[tmp0]) \n\t" 367 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 368 "gsldlc1 %[ftmp8], 0x07(%[tmp0]) \n\t" 369 "gsldrc1 %[ftmp8], 0x00(%[tmp0]) \n\t" 370 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 371 "gsldlc1 %[ftmp9], 0x07(%[tmp0]) \n\t" 372 "gsldrc1 %[ftmp9], 0x00(%[tmp0]) \n\t" 373 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 374 "gsldlc1 %[ftmp10], 0x07(%[tmp0]) \n\t" 375 "gsldrc1 %[ftmp10], 0x00(%[tmp0]) \n\t" 376 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 377 "gsldlc1 %[ftmp11], 0x07(%[tmp0]) \n\t" 378 "gsldrc1 %[ftmp11], 0x00(%[tmp0]) \n\t" 379 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 380 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 381 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 382 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 383 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 384 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 385 "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 386 "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" 387 PTR_ADDIU "%[width], %[width], -0x04 \n\t" 394 "packsswh %[srcl], %[srcl], %[srch] \n\t" 395 "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t" 396 "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t" 397 "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t" 398 "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t" 399 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 400 "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t" 401 "li %[tmp0], 0x10001 \n\t" 402 "dmtc1 %[tmp0], %[ftmp5] \n\t" 403 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 404 "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t" 405 "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t" 406 "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t" 407 "swc1 %[ftmp12], 0x00(%[dst]) \n\t" 411 "bnez %[width], 1b \n\t" 412 PTR_SUBU "%[width], %[addr], %[src_stride] \n\t" 413 PTR_ADDU "%[src], %[src], %[src_stride] \n\t" 414 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 415 PTR_ADDIU "%[height], %[height], -0x01 \n\t" 416 "bnez %[height], 1b \n\t" 417 : [srcl]
"=&f"(ftmp[0]), [srch]
"=&f"(ftmp[1]),
418 [filter10]
"=&f"(ftmp[2]), [filter32]
"=&f"(ftmp[3]),
419 [filter54]
"=&f"(ftmp[4]), [filter76]
"=&f"(ftmp[5]),
420 [ftmp0]
"=&f"(ftmp[6]), [ftmp4]
"=&f"(ftmp[7]),
421 [ftmp5]
"=&f"(ftmp[8]), [ftmp6]
"=&f"(ftmp[9]),
422 [ftmp7]
"=&f"(ftmp[10]), [ftmp8]
"=&f"(ftmp[11]),
423 [ftmp9]
"=&f"(ftmp[12]), [ftmp10]
"=&f"(ftmp[13]),
424 [ftmp11]
"=&f"(ftmp[14]), [ftmp12]
"=&f"(ftmp[15]),
425 [
src]
"+&r"(
src), [dst]
"+&r"(dst),
427 [tmp0]
"=&r"(tmp[0]), [ftmp13]
"=&f"(ftmp[16])
429 [src_stride]
"r"((
mips_reg)src_stride),
430 [dst_stride]
"r"((
mips_reg)dst_stride),
446 "move %[tmp1], %[width] \n\t" 447 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 448 "li %[tmp0], 0x10001 \n\t" 449 "dmtc1 %[tmp0], %[ftmp3] \n\t" 450 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 452 "gslwlc1 %[ftmp1], 0x07(%[src]) \n\t" 453 "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t" 454 "gslwlc1 %[ftmp2], 0x07(%[dst]) \n\t" 455 "gslwrc1 %[ftmp2], 0x00(%[dst]) \n\t" 456 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 457 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 458 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 459 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 460 "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 461 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 462 "swc1 %[ftmp1], 0x00(%[dst]) \n\t" 463 PTR_ADDIU "%[width], %[width], -0x04 \n\t" 466 "bnez %[width], 1b \n\t" 467 "move %[width], %[tmp1] \n\t" 468 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 469 PTR_ADDU "%[src], %[src], %[src_stride] \n\t" 470 PTR_ADDIU "%[height], %[height], -0x01 \n\t" 471 "bnez %[height], 1b \n\t" 472 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
473 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
474 [tmp0]
"=&r"(tmp[0]), [tmp1]
"=&r"(tmp[1]),
475 [
src]
"+&r"(
src), [dst]
"+&r"(dst),
477 : [src_stride]
"r"((
mips_reg)src_stride),
478 [dst_stride]
"r"((
mips_reg)dst_stride)
485 {0, 1, -5, 126, 8, -3, 1, 0},
486 {-1, 3, -10, 122, 18, -6, 2, 0},
487 {-1, 4, -13, 118, 27, -9, 3, -1},
488 {-1, 4, -16, 112, 37, -11, 4, -1},
489 {-1, 5, -18, 105, 48, -14, 4, -1},
490 {-1, 5, -19, 97, 58, -16, 5, -1},
491 {-1, 6, -19, 88, 68, -18, 5, -1},
492 {-1, 6, -19, 78, 78, -19, 6, -1},
493 {-1, 5, -18, 68, 88, -19, 6, -1},
494 {-1, 5, -16, 58, 97, -19, 5, -1},
495 {-1, 4, -14, 48, 105, -18, 5, -1},
496 {-1, 4, -11, 37, 112, -16, 4, -1},
497 {-1, 3, -9, 27, 118, -13, 4, -1},
498 {0, 2, -6, 18, 122, -10, 3, -1},
499 {0, 1, -3, 8, 126, -5, 1, 0},
501 {-1, 3, -7, 127, 8, -3, 1, 0},
502 {-2, 5, -13, 125, 17, -6, 3, -1},
503 {-3, 7, -17, 121, 27, -10, 5, -2},
504 {-4, 9, -20, 115, 37, -13, 6, -2},
505 {-4, 10, -23, 108, 48, -16, 8, -3},
506 {-4, 10, -24, 100, 59, -19, 9, -3},
507 {-4, 11, -24, 90, 70, -21, 10, -4},
508 {-4, 11, -23, 80, 80, -23, 11, -4},
509 {-4, 10, -21, 70, 90, -24, 11, -4},
510 {-3, 9, -19, 59, 100, -24, 10, -4},
511 {-3, 8, -16, 48, 108, -23, 10, -4},
512 {-2, 6, -13, 37, 115, -20, 9, -4},
513 {-2, 5, -10, 27, 121, -17, 7, -3},
514 {-1, 3, -6, 17, 125, -13, 5, -2},
515 {0, 1, -3, 8, 127, -7, 3, -1},
517 {-3, -1, 32, 64, 38, 1, -3, 0},
518 {-2, -2, 29, 63, 41, 2, -3, 0},
519 {-2, -2, 26, 63, 43, 4, -4, 0},
520 {-2, -3, 24, 62, 46, 5, -4, 0},
521 {-2, -3, 21, 60, 49, 7, -4, 0},
522 {-1, -4, 18, 59, 51, 9, -4, 0},
523 {-1, -4, 16, 57, 53, 12, -4, -1},
524 {-1, -4, 14, 55, 55, 14, -4, -1},
525 {-1, -4, 12, 53, 57, 16, -4, -1},
526 {0, -4, 9, 51, 59, 18, -4, -1},
527 {0, -4, 7, 49, 60, 21, -3, -2},
528 {0, -4, 5, 46, 62, 24, -3, -2},
529 {0, -4, 4, 43, 63, 26, -2, -2},
530 {0, -3, 2, 41, 63, 29, -2, -2},
531 {0, -3, 1, 38, 64, 32, -1, -3},
535 #define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX) \ 536 void ff_put_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \ 537 const uint8_t *src, \ 538 ptrdiff_t srcstride, \ 539 int h, int mx, int my) \ 541 const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \ 543 convolve_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \ 546 void ff_put_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \ 547 const uint8_t *src, \ 548 ptrdiff_t srcstride, \ 549 int h, int mx, int my) \ 551 const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \ 553 src -= (3 * srcstride); \ 554 convolve_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \ 557 void ff_put_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \ 558 const uint8_t *src, \ 559 ptrdiff_t srcstride, \ 560 int h, int mx, int my) \ 562 const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \ 563 const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \ 566 uint8_t temp[64 * 71]; \ 567 src -= (3 * srcstride); \ 568 convolve_horiz_mmi(src, srcstride, temp, 64, hfilter, SIZE, tmp_h); \ 569 convolve_vert_mmi(temp, 64, dst, dststride, vfilter, SIZE, h); \ 572 void ff_avg_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \ 573 const uint8_t *src, \ 574 ptrdiff_t srcstride, \ 575 int h, int mx, int my) \ 577 const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \ 579 convolve_avg_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \ 582 void ff_avg_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \ 583 const uint8_t *src, \ 584 ptrdiff_t srcstride, \ 585 int h, int mx, int my) \ 587 const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \ 589 src -= (3 * srcstride); \ 590 convolve_avg_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \ 593 void ff_avg_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \ 594 const uint8_t *src, \ 595 ptrdiff_t srcstride, \ 596 int h, int mx, int my) \ 598 const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \ 599 const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \ 601 uint8_t temp1[64 * 64]; \ 602 uint8_t temp2[64 * 71]; \ 604 src -= (3 * srcstride); \ 605 convolve_horiz_mmi(src, srcstride, temp2, 64, hfilter, SIZE, tmp_h); \ 606 convolve_vert_mmi(temp2, 64, temp1, 64, vfilter, SIZE, h); \ 607 convolve_avg_mmi(temp1, 64, dst, dststride, SIZE, h); \ 628 #undef VP9_8TAP_MIPS_MMI_FUNC static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const uint16_t *filter_x, int32_t w, int32_t h)
#define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0)
brief: (((value) + (1 << ((n) - 1))) >> (n)) fr_i0: src & dst fr_i1: Operand number fr_t0...
static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h)
#define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX)
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h)
static const int16_t vp9_subpel_filters_mmi[3][15][8]
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifoBuffer *values)
__asm__(".macro parse_r var r\n\t""\\var = -1\n\t"_IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)".iflt \\var\n\t"".error \"Unable to parse register name \\r\"\n\t"".endif\n\t"".endm")
static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const uint16_t *filter_x, int32_t w, int32_t h)
static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t w, int32_t h)