60 13036, 13036, 13036, 13036,
61 27146, 27146, 27146, 27146,
62 -21746, -21746, -21746, -21746,
63 23170, 23170, 23170, 23170
138 16384, 16384, 16384, -16384,
139 21407, 8867, 8867, -21407,
140 16384, -16384, 16384, 16384,
141 -8867, 21407, -21407, -8867,
142 22725, 12873, 19266, -22725,
143 19266, 4520, -4520, -12873,
144 12873, 4520, 4520, 19266,
145 -22725, 19266, -12873, -22725,
147 22725, 22725, 22725, -22725,
148 29692, 12299, 12299, -29692,
149 22725, -22725, 22725, 22725,
150 -12299, 29692, -29692, -12299,
151 31521, 17855, 26722, -31521,
152 26722, 6270, -6270, -17855,
153 17855, 6270, 6270, 26722,
154 -31521, 26722, -17855, -31521,
156 21407, 21407, 21407, -21407,
157 27969, 11585, 11585, -27969,
158 21407, -21407, 21407, 21407,
159 -11585, 27969, -27969, -11585,
160 29692, 16819, 25172, -29692,
161 25172, 5906, -5906, -16819,
162 16819, 5906, 5906, 25172,
163 -29692, 25172, -16819, -29692,
165 19266, 19266, 19266, -19266,
166 25172, 10426, 10426, -25172,
167 19266, -19266, 19266, 19266,
168 -10426, 25172, -25172, -10426,
169 26722, 15137, 22654, -26722,
170 22654, 5315, -5315, -15137,
171 15137, 5315, 5315, 22654,
172 -26722, 22654, -15137, -26722,
180 16384, 21407, 16384, 8867,
181 16384, 8867, -16384, -21407,
182 16384, -8867, 16384, -21407,
183 -16384, 21407, 16384, -8867,
184 22725, 19266, 19266, -4520,
185 12873, 4520, -22725, -12873,
186 12873, -22725, 4520, -12873,
187 4520, 19266, 19266, -22725,
189 22725, 29692, 22725, 12299,
190 22725, 12299, -22725, -29692,
191 22725, -12299, 22725, -29692,
192 -22725, 29692, 22725, -12299,
193 31521, 26722, 26722, -6270,
194 17855, 6270, -31521, -17855,
195 17855, -31521, 6270, -17855,
196 6270, 26722, 26722, -31521,
198 21407, 27969, 21407, 11585,
199 21407, 11585, -21407, -27969,
200 21407, -11585, 21407, -27969,
201 -21407, 27969, 21407, -11585,
202 29692, 25172, 25172, -5906,
203 16819, 5906, -29692, -16819,
204 16819, -29692, 5906, -16819,
205 5906, 25172, 25172, -29692,
207 19266, 25172, 19266, 10426,
208 19266, 10426, -19266, -25172,
209 19266, -10426, 19266, -25172,
210 -19266, 25172, 19266, -10426,
211 26722, 22654, 22654, -5315,
212 15137, 5315, -26722, -15137,
213 15137, -26722, 5315, -15137,
214 5315, 22654, 22654, -26722,
224 #define DCT_8_INV_ROW_MMX(A1, A2, A3, A4) \
225 "movq "#A1", %%mm0 \n\t" \
226 "movq 8+"#A1", %%mm1 \n\t" \
227 "movq %%mm0, %%mm2 \n\t" \
228 "movq "#A3", %%mm3 \n\t" \
229 "punpcklwd %%mm1, %%mm0 \n\t" \
230 "movq %%mm0, %%mm5 \n\t" \
231 "punpckldq %%mm0, %%mm0 \n\t" \
232 "movq 8+"#A3", %%mm4 \n\t" \
233 "punpckhwd %%mm1, %%mm2 \n\t" \
234 "pmaddwd %%mm0, %%mm3 \n\t" \
235 "movq %%mm2, %%mm6 \n\t" \
236 "movq 32+"#A3", %%mm1 \n\t" \
237 "punpckldq %%mm2, %%mm2 \n\t" \
238 "pmaddwd %%mm2, %%mm4 \n\t" \
239 "punpckhdq %%mm5, %%mm5 \n\t" \
240 "pmaddwd 16+"#A3", %%mm0 \n\t" \
241 "punpckhdq %%mm6, %%mm6 \n\t" \
242 "movq 40+ "#A3", %%mm7 \n\t" \
243 "pmaddwd %%mm5, %%mm1 \n\t" \
244 "paddd "#A4", %%mm3 \n\t" \
245 "pmaddwd %%mm6, %%mm7 \n\t" \
246 "pmaddwd 24+"#A3", %%mm2 \n\t" \
247 "paddd %%mm4, %%mm3 \n\t" \
248 "pmaddwd 48+"#A3", %%mm5 \n\t" \
249 "movq %%mm3, %%mm4 \n\t" \
250 "pmaddwd 56+"#A3", %%mm6 \n\t" \
251 "paddd %%mm7, %%mm1 \n\t" \
252 "paddd "#A4", %%mm0 \n\t" \
253 "psubd %%mm1, %%mm3 \n\t" \
254 "psrad $11, %%mm3 \n\t" \
255 "paddd %%mm4, %%mm1 \n\t" \
256 "paddd %%mm2, %%mm0 \n\t" \
257 "psrad $11, %%mm1 \n\t" \
258 "paddd %%mm6, %%mm5 \n\t" \
259 "movq %%mm0, %%mm4 \n\t" \
260 "paddd %%mm5, %%mm0 \n\t" \
261 "psubd %%mm5, %%mm4 \n\t" \
262 "psrad $11, %%mm0 \n\t" \
263 "psrad $11, %%mm4 \n\t" \
264 "packssdw %%mm0, %%mm1 \n\t" \
265 "packssdw %%mm3, %%mm4 \n\t" \
266 "movq %%mm4, %%mm7 \n\t" \
267 "psrld $16, %%mm4 \n\t" \
268 "pslld $16, %%mm7 \n\t" \
269 "movq %%mm1, "#A2" \n\t" \
270 "por %%mm4, %%mm7 \n\t" \
271 "movq %%mm7, 8+"#A2" \n\t" \
278 #define DCT_8_INV_ROW_XMM(A1, A2, A3, A4) \
279 "movq "#A1", %%mm0 \n\t" \
280 "movq 8+"#A1", %%mm1 \n\t" \
281 "movq %%mm0, %%mm2 \n\t" \
282 "movq "#A3", %%mm3 \n\t" \
283 "pshufw $0x88, %%mm0, %%mm0 \n\t" \
284 "movq 8+"#A3", %%mm4 \n\t" \
285 "movq %%mm1, %%mm5 \n\t" \
286 "pmaddwd %%mm0, %%mm3 \n\t" \
287 "movq 32+"#A3", %%mm6 \n\t" \
288 "pshufw $0x88, %%mm1, %%mm1 \n\t" \
289 "pmaddwd %%mm1, %%mm4 \n\t" \
290 "movq 40+"#A3", %%mm7 \n\t" \
291 "pshufw $0xdd, %%mm2, %%mm2 \n\t" \
292 "pmaddwd %%mm2, %%mm6 \n\t" \
293 "pshufw $0xdd, %%mm5, %%mm5 \n\t" \
294 "pmaddwd %%mm5, %%mm7 \n\t" \
295 "paddd "#A4", %%mm3 \n\t" \
296 "pmaddwd 16+"#A3", %%mm0 \n\t" \
297 "paddd %%mm4, %%mm3 \n\t" \
298 "pmaddwd 24+"#A3", %%mm1 \n\t" \
299 "movq %%mm3, %%mm4 \n\t" \
300 "pmaddwd 48+"#A3", %%mm2 \n\t" \
301 "paddd %%mm7, %%mm6 \n\t" \
302 "pmaddwd 56+"#A3", %%mm5 \n\t" \
303 "paddd %%mm6, %%mm3 \n\t" \
304 "paddd "#A4", %%mm0 \n\t" \
305 "psrad $11, %%mm3 \n\t" \
306 "paddd %%mm1, %%mm0 \n\t" \
307 "psubd %%mm6, %%mm4 \n\t" \
308 "movq %%mm0, %%mm7 \n\t" \
309 "paddd %%mm5, %%mm2 \n\t" \
310 "paddd %%mm2, %%mm0 \n\t" \
311 "psrad $11, %%mm4 \n\t" \
312 "psubd %%mm2, %%mm7 \n\t" \
313 "psrad $11, %%mm0 \n\t" \
314 "psrad $11, %%mm7 \n\t" \
315 "packssdw %%mm0, %%mm3 \n\t" \
316 "packssdw %%mm4, %%mm7 \n\t" \
317 "movq %%mm3, "#A2" \n\t" \
318 "pshufw $0xb1, %%mm7, %%mm7 \n\t" \
319 "movq %%mm7, 8+"#A2" \n\t" \
385 #define DCT_8_INV_COL(A1, A2) \
386 "movq 2*8(%3), %%mm0 \n\t" \
387 "movq 16*3+"#A1", %%mm3 \n\t" \
388 "movq %%mm0, %%mm1 \n\t" \
389 "movq 16*5+"#A1", %%mm5 \n\t" \
390 "pmulhw %%mm3, %%mm0 \n\t" \
391 "movq (%3), %%mm4 \n\t" \
392 "pmulhw %%mm5, %%mm1 \n\t" \
393 "movq 16*7+"#A1", %%mm7 \n\t" \
394 "movq %%mm4, %%mm2 \n\t" \
395 "movq 16*1+"#A1", %%mm6 \n\t" \
396 "pmulhw %%mm7, %%mm4 \n\t" \
397 "paddsw %%mm3, %%mm0 \n\t" \
398 "pmulhw %%mm6, %%mm2 \n\t" \
399 "paddsw %%mm3, %%mm1 \n\t" \
400 "psubsw %%mm5, %%mm0 \n\t" \
401 "movq 3*8(%3), %%mm3 \n\t" \
402 "paddsw %%mm5, %%mm1 \n\t" \
403 "paddsw %%mm6, %%mm4 \n\t" \
404 "psubsw %%mm7, %%mm2 \n\t" \
405 "movq %%mm4, %%mm5 \n\t" \
406 "movq %%mm2, %%mm6 \n\t" \
407 "paddsw %%mm1, %%mm5 \n\t" \
408 "psubsw %%mm0, %%mm6 \n\t" \
409 "psubsw %%mm1, %%mm4 \n\t" \
410 "paddsw %%mm0, %%mm2 \n\t" \
411 "movq 1*8(%3), %%mm7 \n\t" \
412 "movq %%mm4, %%mm1 \n\t" \
413 "movq %%mm5, 3*16+"#A2" \n\t" \
414 "paddsw %%mm2, %%mm1 \n\t" \
415 "movq %%mm6, 5*16+"#A2" \n\t" \
416 "psubsw %%mm2, %%mm4 \n\t" \
417 "movq 2*16+"#A1", %%mm5 \n\t" \
418 "movq %%mm7, %%mm0 \n\t" \
419 "movq 6*16+"#A1", %%mm6 \n\t" \
420 "pmulhw %%mm5, %%mm0 \n\t" \
421 "pmulhw %%mm6, %%mm7 \n\t" \
422 "pmulhw %%mm3, %%mm1 \n\t" \
423 "movq 0*16+"#A1", %%mm2 \n\t" \
424 "pmulhw %%mm3, %%mm4 \n\t" \
425 "psubsw %%mm6, %%mm0 \n\t" \
426 "movq %%mm2, %%mm3 \n\t" \
427 "movq 4*16+"#A1", %%mm6 \n\t" \
428 "paddsw %%mm5, %%mm7 \n\t" \
429 "paddsw %%mm6, %%mm2 \n\t" \
430 "psubsw %%mm6, %%mm3 \n\t" \
431 "movq %%mm2, %%mm5 \n\t" \
432 "movq %%mm3, %%mm6 \n\t" \
433 "psubsw %%mm7, %%mm2 \n\t" \
434 "paddsw %%mm0, %%mm3 \n\t" \
435 "paddsw %%mm1, %%mm1 \n\t" \
436 "paddsw %%mm4, %%mm4 \n\t" \
437 "paddsw %%mm7, %%mm5 \n\t" \
438 "psubsw %%mm0, %%mm6 \n\t" \
439 "movq %%mm3, %%mm7 \n\t" \
440 "movq %%mm6, %%mm0 \n\t" \
441 "paddsw %%mm1, %%mm3 \n\t" \
442 "paddsw %%mm4, %%mm6 \n\t" \
443 "psraw $6, %%mm3 \n\t" \
444 "psubsw %%mm1, %%mm7 \n\t" \
445 "psraw $6, %%mm6 \n\t" \
446 "psubsw %%mm4, %%mm0 \n\t" \
447 "movq 3*16+"#A2", %%mm1 \n\t" \
448 "psraw $6, %%mm7 \n\t" \
449 "movq %%mm5, %%mm4 \n\t" \
450 "psraw $6, %%mm0 \n\t" \
451 "movq %%mm3, 1*16+"#A2" \n\t" \
452 "paddsw %%mm1, %%mm5 \n\t" \
453 "movq %%mm6, 2*16+"#A2" \n\t" \
454 "psubsw %%mm1, %%mm4 \n\t" \
455 "movq 5*16+"#A2", %%mm3 \n\t" \
456 "psraw $6, %%mm5 \n\t" \
457 "movq %%mm2, %%mm6 \n\t" \
458 "psraw $6, %%mm4 \n\t" \
459 "movq %%mm0, 5*16+"#A2" \n\t" \
460 "paddsw %%mm3, %%mm2 \n\t" \
461 "movq %%mm7, 6*16+"#A2" \n\t" \
462 "psubsw %%mm3, %%mm6 \n\t" \
463 "movq %%mm5, 0*16+"#A2" \n\t" \
464 "psraw $6, %%mm2 \n\t" \
465 "movq %%mm4, 7*16+"#A2" \n\t" \
466 "psraw $6, %%mm6 \n\t" \
467 "movq %%mm2, 3*16+"#A2" \n\t" \
468 "movq %%mm6, 4*16+"#A2" \n\t" \
482 DCT_8_INV_ROW_MMX(0 * 16(%0), 0 * 16(%0), 64 * 0(%2), 8 * 0(%1))
483 DCT_8_INV_ROW_MMX(1 * 16(%0), 1 * 16(%0), 64 * 1(%2), 8 * 1(%1))
484 DCT_8_INV_ROW_MMX(2 * 16(%0), 2 * 16(%0), 64 * 2(%2), 8 * 2(%1))
485 DCT_8_INV_ROW_MMX(3 * 16(%0), 3 * 16(%0), 64 * 3(%2), 8 * 3(%1))
486 DCT_8_INV_ROW_MMX(4 * 16(%0), 4 * 16(%0), 64 * 0(%2), 8 * 4(%1))
487 DCT_8_INV_ROW_MMX(5 * 16(%0), 5 * 16(%0), 64 * 3(%2), 8 * 5(%1))
488 DCT_8_INV_ROW_MMX(6 * 16(%0), 6 * 16(%0), 64 * 2(%2), 8 * 6(%1))
489 DCT_8_INV_ROW_MMX(7 * 16(%0), 7 * 16(%0), 64 * 1(%2), 8 * 7(%1))
492 DCT_8_INV_COL(0(%0), 0(%0))
493 DCT_8_INV_COL(8(%0), 8(%0))
494 :: "
r" (block), "
r" (rounder_0), "
r" (tab_i_04_mmx), "
r" (tg_1_16));
511 #if HAVE_MMXEXT_INLINE
521 DCT_8_INV_ROW_XMM(0 * 16(%0), 0 * 16(%0), 64 * 0(%2), 8 * 0(%1))
522 DCT_8_INV_ROW_XMM(1 * 16(%0), 1 * 16(%0), 64 * 1(%2), 8 * 1(%1))
523 DCT_8_INV_ROW_XMM(2 * 16(%0), 2 * 16(%0), 64 * 2(%2), 8 * 2(%1))
524 DCT_8_INV_ROW_XMM(3 * 16(%0), 3 * 16(%0), 64 * 3(%2), 8 * 3(%1))
525 DCT_8_INV_ROW_XMM(4 * 16(%0), 4 * 16(%0), 64 * 0(%2), 8 * 4(%1))
526 DCT_8_INV_ROW_XMM(5 * 16(%0), 5 * 16(%0), 64 * 3(%2), 8 * 5(%1))
527 DCT_8_INV_ROW_XMM(6 * 16(%0), 6 * 16(%0), 64 * 2(%2), 8 * 6(%1))
528 DCT_8_INV_ROW_XMM(7 * 16(%0), 7 * 16(%0), 64 * 1(%2), 8 * 7(%1))
531 DCT_8_INV_COL(0(%0), 0(%0))
532 DCT_8_INV_COL(8(%0), 8(%0))
533 :: "
r" (block), "
r" (rounder_0), "
r" (tab_i_04_xmm), "
r" (tg_1_16));