43 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
44 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
45 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
46 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
47 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
48 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
49 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
50 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
53 #define COL_SHIFT 20 // 6
89 int16_t *
const temp= (int16_t*)align_tmp;
92 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
93 "movq " #src0 ", %%mm0 \n\t" \
94 "movq " #src4 ", %%mm1 \n\t" \
95 "movq " #src1 ", %%mm2 \n\t" \
96 "movq " #src5 ", %%mm3 \n\t" \
97 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
98 "pand %%mm0, %%mm4 \n\t"\
99 "por %%mm1, %%mm4 \n\t"\
100 "por %%mm2, %%mm4 \n\t"\
101 "por %%mm3, %%mm4 \n\t"\
102 "packssdw %%mm4,%%mm4 \n\t"\
103 "movd %%mm4, %%eax \n\t"\
104 "orl %%eax, %%eax \n\t"\
106 "movq 16(%2), %%mm4 \n\t" \
107 "pmaddwd %%mm0, %%mm4 \n\t" \
108 "movq 24(%2), %%mm5 \n\t" \
109 "pmaddwd %%mm5, %%mm0 \n\t" \
110 "movq 32(%2), %%mm5 \n\t" \
111 "pmaddwd %%mm1, %%mm5 \n\t" \
112 "movq 40(%2), %%mm6 \n\t" \
113 "pmaddwd %%mm6, %%mm1 \n\t" \
114 "movq 48(%2), %%mm7 \n\t" \
115 "pmaddwd %%mm2, %%mm7 \n\t" \
116 #rounder ", %%mm4 \n\t"\
117 "movq %%mm4, %%mm6 \n\t" \
118 "paddd %%mm5, %%mm4 \n\t" \
119 "psubd %%mm5, %%mm6 \n\t" \
120 "movq 56(%2), %%mm5 \n\t" \
121 "pmaddwd %%mm3, %%mm5 \n\t" \
122 #rounder ", %%mm0 \n\t"\
123 "paddd %%mm0, %%mm1 \n\t" \
124 "paddd %%mm0, %%mm0 \n\t" \
125 "psubd %%mm1, %%mm0 \n\t" \
126 "pmaddwd 64(%2), %%mm2 \n\t" \
127 "paddd %%mm5, %%mm7 \n\t" \
128 "movq 72(%2), %%mm5 \n\t" \
129 "pmaddwd %%mm3, %%mm5 \n\t" \
130 "paddd %%mm4, %%mm7 \n\t" \
131 "paddd %%mm4, %%mm4 \n\t" \
132 "psubd %%mm7, %%mm4 \n\t" \
133 "paddd %%mm2, %%mm5 \n\t" \
134 "psrad $" #shift ", %%mm7 \n\t"\
135 "psrad $" #shift ", %%mm4 \n\t"\
136 "movq %%mm1, %%mm2 \n\t" \
137 "paddd %%mm5, %%mm1 \n\t" \
138 "psubd %%mm5, %%mm2 \n\t" \
139 "psrad $" #shift ", %%mm1 \n\t"\
140 "psrad $" #shift ", %%mm2 \n\t"\
141 "packssdw %%mm1, %%mm7 \n\t" \
142 "packssdw %%mm4, %%mm2 \n\t" \
143 "movq %%mm7, " #dst " \n\t"\
144 "movq " #src1 ", %%mm1 \n\t" \
145 "movq 80(%2), %%mm4 \n\t" \
146 "movq %%mm2, 24+" #dst " \n\t"\
147 "pmaddwd %%mm1, %%mm4 \n\t" \
148 "movq 88(%2), %%mm7 \n\t" \
149 "pmaddwd 96(%2), %%mm1 \n\t" \
150 "pmaddwd %%mm3, %%mm7 \n\t" \
151 "movq %%mm0, %%mm2 \n\t" \
152 "pmaddwd 104(%2), %%mm3 \n\t" \
153 "paddd %%mm7, %%mm4 \n\t" \
154 "paddd %%mm4, %%mm2 \n\t" \
155 "psubd %%mm4, %%mm0 \n\t" \
156 "psrad $" #shift ", %%mm2 \n\t"\
157 "psrad $" #shift ", %%mm0 \n\t"\
158 "movq %%mm6, %%mm4 \n\t" \
159 "paddd %%mm1, %%mm3 \n\t" \
160 "paddd %%mm3, %%mm6 \n\t" \
161 "psubd %%mm3, %%mm4 \n\t" \
162 "psrad $" #shift ", %%mm6 \n\t"\
163 "packssdw %%mm6, %%mm2 \n\t" \
164 "movq %%mm2, 8+" #dst " \n\t"\
165 "psrad $" #shift ", %%mm4 \n\t"\
166 "packssdw %%mm0, %%mm4 \n\t" \
167 "movq %%mm4, 16+" #dst " \n\t"\
170 "pslld $16, %%mm0 \n\t"\
171 "paddd "MANGLE(d40000)", %%mm0 \n\t"\
172 "psrad $13, %%mm0 \n\t"\
173 "packssdw %%mm0, %%mm0 \n\t"\
174 "movq %%mm0, " #dst " \n\t"\
175 "movq %%mm0, 8+" #dst " \n\t"\
176 "movq %%mm0, 16+" #dst " \n\t"\
177 "movq %%mm0, 24+" #dst " \n\t"\
180 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
181 "movq " #src0 ", %%mm0 \n\t" \
182 "movq " #src4 ", %%mm1 \n\t" \
183 "movq " #src1 ", %%mm2 \n\t" \
184 "movq " #src5 ", %%mm3 \n\t" \
185 "movq %%mm0, %%mm4 \n\t"\
186 "por %%mm1, %%mm4 \n\t"\
187 "por %%mm2, %%mm4 \n\t"\
188 "por %%mm3, %%mm4 \n\t"\
189 "packssdw %%mm4,%%mm4 \n\t"\
190 "movd %%mm4, %%eax \n\t"\
191 "orl %%eax, %%eax \n\t"\
193 "movq 16(%2), %%mm4 \n\t" \
194 "pmaddwd %%mm0, %%mm4 \n\t" \
195 "movq 24(%2), %%mm5 \n\t" \
196 "pmaddwd %%mm5, %%mm0 \n\t" \
197 "movq 32(%2), %%mm5 \n\t" \
198 "pmaddwd %%mm1, %%mm5 \n\t" \
199 "movq 40(%2), %%mm6 \n\t" \
200 "pmaddwd %%mm6, %%mm1 \n\t" \
201 "movq 48(%2), %%mm7 \n\t" \
202 "pmaddwd %%mm2, %%mm7 \n\t" \
203 #rounder ", %%mm4 \n\t"\
204 "movq %%mm4, %%mm6 \n\t" \
205 "paddd %%mm5, %%mm4 \n\t" \
206 "psubd %%mm5, %%mm6 \n\t" \
207 "movq 56(%2), %%mm5 \n\t" \
208 "pmaddwd %%mm3, %%mm5 \n\t" \
209 #rounder ", %%mm0 \n\t"\
210 "paddd %%mm0, %%mm1 \n\t" \
211 "paddd %%mm0, %%mm0 \n\t" \
212 "psubd %%mm1, %%mm0 \n\t" \
213 "pmaddwd 64(%2), %%mm2 \n\t" \
214 "paddd %%mm5, %%mm7 \n\t" \
215 "movq 72(%2), %%mm5 \n\t" \
216 "pmaddwd %%mm3, %%mm5 \n\t" \
217 "paddd %%mm4, %%mm7 \n\t" \
218 "paddd %%mm4, %%mm4 \n\t" \
219 "psubd %%mm7, %%mm4 \n\t" \
220 "paddd %%mm2, %%mm5 \n\t" \
221 "psrad $" #shift ", %%mm7 \n\t"\
222 "psrad $" #shift ", %%mm4 \n\t"\
223 "movq %%mm1, %%mm2 \n\t" \
224 "paddd %%mm5, %%mm1 \n\t" \
225 "psubd %%mm5, %%mm2 \n\t" \
226 "psrad $" #shift ", %%mm1 \n\t"\
227 "psrad $" #shift ", %%mm2 \n\t"\
228 "packssdw %%mm1, %%mm7 \n\t" \
229 "packssdw %%mm4, %%mm2 \n\t" \
230 "movq %%mm7, " #dst " \n\t"\
231 "movq " #src1 ", %%mm1 \n\t" \
232 "movq 80(%2), %%mm4 \n\t" \
233 "movq %%mm2, 24+" #dst " \n\t"\
234 "pmaddwd %%mm1, %%mm4 \n\t" \
235 "movq 88(%2), %%mm7 \n\t" \
236 "pmaddwd 96(%2), %%mm1 \n\t" \
237 "pmaddwd %%mm3, %%mm7 \n\t" \
238 "movq %%mm0, %%mm2 \n\t" \
239 "pmaddwd 104(%2), %%mm3 \n\t" \
240 "paddd %%mm7, %%mm4 \n\t" \
241 "paddd %%mm4, %%mm2 \n\t" \
242 "psubd %%mm4, %%mm0 \n\t" \
243 "psrad $" #shift ", %%mm2 \n\t"\
244 "psrad $" #shift ", %%mm0 \n\t"\
245 "movq %%mm6, %%mm4 \n\t" \
246 "paddd %%mm1, %%mm3 \n\t" \
247 "paddd %%mm3, %%mm6 \n\t" \
248 "psubd %%mm3, %%mm4 \n\t" \
249 "psrad $" #shift ", %%mm6 \n\t"\
250 "packssdw %%mm6, %%mm2 \n\t" \
251 "movq %%mm2, 8+" #dst " \n\t"\
252 "psrad $" #shift ", %%mm4 \n\t"\
253 "packssdw %%mm0, %%mm4 \n\t" \
254 "movq %%mm4, 16+" #dst " \n\t"\
256 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
257 "movq " #src0 ", %%mm0 \n\t" \
258 "movq " #src4 ", %%mm1 \n\t" \
259 "movq " #src1 ", %%mm2 \n\t" \
260 "movq " #src5 ", %%mm3 \n\t" \
261 "movq 16(%2), %%mm4 \n\t" \
262 "pmaddwd %%mm0, %%mm4 \n\t" \
263 "movq 24(%2), %%mm5 \n\t" \
264 "pmaddwd %%mm5, %%mm0 \n\t" \
265 "movq 32(%2), %%mm5 \n\t" \
266 "pmaddwd %%mm1, %%mm5 \n\t" \
267 "movq 40(%2), %%mm6 \n\t" \
268 "pmaddwd %%mm6, %%mm1 \n\t" \
269 "movq 48(%2), %%mm7 \n\t" \
270 "pmaddwd %%mm2, %%mm7 \n\t" \
271 #rounder ", %%mm4 \n\t"\
272 "movq %%mm4, %%mm6 \n\t" \
273 "paddd %%mm5, %%mm4 \n\t" \
274 "psubd %%mm5, %%mm6 \n\t" \
275 "movq 56(%2), %%mm5 \n\t" \
276 "pmaddwd %%mm3, %%mm5 \n\t" \
277 #rounder ", %%mm0 \n\t"\
278 "paddd %%mm0, %%mm1 \n\t" \
279 "paddd %%mm0, %%mm0 \n\t" \
280 "psubd %%mm1, %%mm0 \n\t" \
281 "pmaddwd 64(%2), %%mm2 \n\t" \
282 "paddd %%mm5, %%mm7 \n\t" \
283 "movq 72(%2), %%mm5 \n\t" \
284 "pmaddwd %%mm3, %%mm5 \n\t" \
285 "paddd %%mm4, %%mm7 \n\t" \
286 "paddd %%mm4, %%mm4 \n\t" \
287 "psubd %%mm7, %%mm4 \n\t" \
288 "paddd %%mm2, %%mm5 \n\t" \
289 "psrad $" #shift ", %%mm7 \n\t"\
290 "psrad $" #shift ", %%mm4 \n\t"\
291 "movq %%mm1, %%mm2 \n\t" \
292 "paddd %%mm5, %%mm1 \n\t" \
293 "psubd %%mm5, %%mm2 \n\t" \
294 "psrad $" #shift ", %%mm1 \n\t"\
295 "psrad $" #shift ", %%mm2 \n\t"\
296 "packssdw %%mm1, %%mm7 \n\t" \
297 "packssdw %%mm4, %%mm2 \n\t" \
298 "movq %%mm7, " #dst " \n\t"\
299 "movq " #src1 ", %%mm1 \n\t" \
300 "movq 80(%2), %%mm4 \n\t" \
301 "movq %%mm2, 24+" #dst " \n\t"\
302 "pmaddwd %%mm1, %%mm4 \n\t" \
303 "movq 88(%2), %%mm7 \n\t" \
304 "pmaddwd 96(%2), %%mm1 \n\t" \
305 "pmaddwd %%mm3, %%mm7 \n\t" \
306 "movq %%mm0, %%mm2 \n\t" \
307 "pmaddwd 104(%2), %%mm3 \n\t" \
308 "paddd %%mm7, %%mm4 \n\t" \
309 "paddd %%mm4, %%mm2 \n\t" \
310 "psubd %%mm4, %%mm0 \n\t" \
311 "psrad $" #shift ", %%mm2 \n\t"\
312 "psrad $" #shift ", %%mm0 \n\t"\
313 "movq %%mm6, %%mm4 \n\t" \
314 "paddd %%mm1, %%mm3 \n\t" \
315 "paddd %%mm3, %%mm6 \n\t" \
316 "psubd %%mm3, %%mm4 \n\t" \
317 "psrad $" #shift ", %%mm6 \n\t"\
318 "packssdw %%mm6, %%mm2 \n\t" \
319 "movq %%mm2, 8+" #dst " \n\t"\
320 "psrad $" #shift ", %%mm4 \n\t"\
321 "packssdw %%mm0, %%mm4 \n\t" \
322 "movq %%mm4, 16+" #dst " \n\t"\
325 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
326 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
327 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
328 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
331 #define IDCT(src0, src4, src1, src5, dst, shift) \
332 "movq " #src0 ", %%mm0 \n\t" \
333 "movq " #src4 ", %%mm1 \n\t" \
334 "movq " #src1 ", %%mm2 \n\t" \
335 "movq " #src5 ", %%mm3 \n\t" \
336 "movq 16(%2), %%mm4 \n\t" \
337 "pmaddwd %%mm0, %%mm4 \n\t" \
338 "movq 24(%2), %%mm5 \n\t" \
339 "pmaddwd %%mm5, %%mm0 \n\t" \
340 "movq 32(%2), %%mm5 \n\t" \
341 "pmaddwd %%mm1, %%mm5 \n\t" \
342 "movq 40(%2), %%mm6 \n\t" \
343 "pmaddwd %%mm6, %%mm1 \n\t" \
344 "movq %%mm4, %%mm6 \n\t" \
345 "movq 48(%2), %%mm7 \n\t" \
346 "pmaddwd %%mm2, %%mm7 \n\t" \
347 "paddd %%mm5, %%mm4 \n\t" \
348 "psubd %%mm5, %%mm6 \n\t" \
349 "movq %%mm0, %%mm5 \n\t" \
350 "paddd %%mm1, %%mm0 \n\t" \
351 "psubd %%mm1, %%mm5 \n\t" \
352 "movq 56(%2), %%mm1 \n\t" \
353 "pmaddwd %%mm3, %%mm1 \n\t" \
354 "pmaddwd 64(%2), %%mm2 \n\t" \
355 "paddd %%mm1, %%mm7 \n\t" \
356 "movq 72(%2), %%mm1 \n\t" \
357 "pmaddwd %%mm3, %%mm1 \n\t" \
358 "paddd %%mm4, %%mm7 \n\t" \
359 "paddd %%mm4, %%mm4 \n\t" \
360 "psubd %%mm7, %%mm4 \n\t" \
361 "paddd %%mm2, %%mm1 \n\t" \
362 "psrad $" #shift ", %%mm7 \n\t"\
363 "psrad $" #shift ", %%mm4 \n\t"\
364 "movq %%mm0, %%mm2 \n\t" \
365 "paddd %%mm1, %%mm0 \n\t" \
366 "psubd %%mm1, %%mm2 \n\t" \
367 "psrad $" #shift ", %%mm0 \n\t"\
368 "psrad $" #shift ", %%mm2 \n\t"\
369 "packssdw %%mm7, %%mm7 \n\t" \
370 "movd %%mm7, " #dst " \n\t"\
371 "packssdw %%mm0, %%mm0 \n\t" \
372 "movd %%mm0, 16+" #dst " \n\t"\
373 "packssdw %%mm2, %%mm2 \n\t" \
374 "movd %%mm2, 96+" #dst " \n\t"\
375 "packssdw %%mm4, %%mm4 \n\t" \
376 "movd %%mm4, 112+" #dst " \n\t"\
377 "movq " #src1 ", %%mm0 \n\t" \
378 "movq 80(%2), %%mm4 \n\t" \
379 "pmaddwd %%mm0, %%mm4 \n\t" \
380 "movq 88(%2), %%mm7 \n\t" \
381 "pmaddwd 96(%2), %%mm0 \n\t" \
382 "pmaddwd %%mm3, %%mm7 \n\t" \
383 "movq %%mm5, %%mm2 \n\t" \
384 "pmaddwd 104(%2), %%mm3 \n\t" \
385 "paddd %%mm7, %%mm4 \n\t" \
386 "paddd %%mm4, %%mm2 \n\t" \
387 "psubd %%mm4, %%mm5 \n\t" \
388 "psrad $" #shift ", %%mm2 \n\t"\
389 "psrad $" #shift ", %%mm5 \n\t"\
390 "movq %%mm6, %%mm4 \n\t" \
391 "paddd %%mm0, %%mm3 \n\t" \
392 "paddd %%mm3, %%mm6 \n\t" \
393 "psubd %%mm3, %%mm4 \n\t" \
394 "psrad $" #shift ", %%mm6 \n\t"\
395 "psrad $" #shift ", %%mm4 \n\t"\
396 "packssdw %%mm2, %%mm2 \n\t" \
397 "packssdw %%mm6, %%mm6 \n\t" \
398 "movd %%mm2, 32+" #dst " \n\t"\
399 "packssdw %%mm4, %%mm4 \n\t" \
400 "packssdw %%mm5, %%mm5 \n\t" \
401 "movd %%mm6, 48+" #dst " \n\t"\
402 "movd %%mm4, 64+" #dst " \n\t"\
403 "movd %%mm5, 80+" #dst " \n\t"
407 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
408 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
409 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
410 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
415 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
416 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
419 #define IDCT(src0, src4, src1, src5, dst, shift) \
420 "movq " #src0 ", %%mm0 \n\t" \
421 "movq " #src4 ", %%mm1 \n\t" \
422 "movq " #src5 ", %%mm3 \n\t" \
423 "movq 16(%2), %%mm4 \n\t" \
424 "pmaddwd %%mm0, %%mm4 \n\t" \
425 "movq 24(%2), %%mm5 \n\t" \
426 "pmaddwd %%mm5, %%mm0 \n\t" \
427 "movq 32(%2), %%mm5 \n\t" \
428 "pmaddwd %%mm1, %%mm5 \n\t" \
429 "movq 40(%2), %%mm6 \n\t" \
430 "pmaddwd %%mm6, %%mm1 \n\t" \
431 "movq %%mm4, %%mm6 \n\t" \
432 "paddd %%mm5, %%mm4 \n\t" \
433 "psubd %%mm5, %%mm6 \n\t" \
434 "movq %%mm0, %%mm5 \n\t" \
435 "paddd %%mm1, %%mm0 \n\t" \
436 "psubd %%mm1, %%mm5 \n\t" \
437 "movq 56(%2), %%mm1 \n\t" \
438 "pmaddwd %%mm3, %%mm1 \n\t" \
439 "movq 72(%2), %%mm7 \n\t" \
440 "pmaddwd %%mm3, %%mm7 \n\t" \
441 "paddd %%mm4, %%mm1 \n\t" \
442 "paddd %%mm4, %%mm4 \n\t" \
443 "psubd %%mm1, %%mm4 \n\t" \
444 "psrad $" #shift ", %%mm1 \n\t"\
445 "psrad $" #shift ", %%mm4 \n\t"\
446 "movq %%mm0, %%mm2 \n\t" \
447 "paddd %%mm7, %%mm0 \n\t" \
448 "psubd %%mm7, %%mm2 \n\t" \
449 "psrad $" #shift ", %%mm0 \n\t"\
450 "psrad $" #shift ", %%mm2 \n\t"\
451 "packssdw %%mm1, %%mm1 \n\t" \
452 "movd %%mm1, " #dst " \n\t"\
453 "packssdw %%mm0, %%mm0 \n\t" \
454 "movd %%mm0, 16+" #dst " \n\t"\
455 "packssdw %%mm2, %%mm2 \n\t" \
456 "movd %%mm2, 96+" #dst " \n\t"\
457 "packssdw %%mm4, %%mm4 \n\t" \
458 "movd %%mm4, 112+" #dst " \n\t"\
459 "movq 88(%2), %%mm1 \n\t" \
460 "pmaddwd %%mm3, %%mm1 \n\t" \
461 "movq %%mm5, %%mm2 \n\t" \
462 "pmaddwd 104(%2), %%mm3 \n\t" \
463 "paddd %%mm1, %%mm2 \n\t" \
464 "psubd %%mm1, %%mm5 \n\t" \
465 "psrad $" #shift ", %%mm2 \n\t"\
466 "psrad $" #shift ", %%mm5 \n\t"\
467 "movq %%mm6, %%mm1 \n\t" \
468 "paddd %%mm3, %%mm6 \n\t" \
469 "psubd %%mm3, %%mm1 \n\t" \
470 "psrad $" #shift ", %%mm6 \n\t"\
471 "psrad $" #shift ", %%mm1 \n\t"\
472 "packssdw %%mm2, %%mm2 \n\t" \
473 "packssdw %%mm6, %%mm6 \n\t" \
474 "movd %%mm2, 32+" #dst " \n\t"\
475 "packssdw %%mm1, %%mm1 \n\t" \
476 "packssdw %%mm5, %%mm5 \n\t" \
477 "movd %%mm6, 48+" #dst " \n\t"\
478 "movd %%mm1, 64+" #dst " \n\t"\
479 "movd %%mm5, 80+" #dst " \n\t"
482 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
483 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
484 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
485 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
490 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
493 #define IDCT(src0, src4, src1, src5, dst, shift) \
494 "movq " #src0 ", %%mm0 \n\t" \
495 "movq " #src5 ", %%mm3 \n\t" \
496 "movq 16(%2), %%mm4 \n\t" \
497 "pmaddwd %%mm0, %%mm4 \n\t" \
498 "movq 24(%2), %%mm5 \n\t" \
499 "pmaddwd %%mm5, %%mm0 \n\t" \
500 "movq %%mm4, %%mm6 \n\t" \
501 "movq %%mm0, %%mm5 \n\t" \
502 "movq 56(%2), %%mm1 \n\t" \
503 "pmaddwd %%mm3, %%mm1 \n\t" \
504 "movq 72(%2), %%mm7 \n\t" \
505 "pmaddwd %%mm3, %%mm7 \n\t" \
506 "paddd %%mm4, %%mm1 \n\t" \
507 "paddd %%mm4, %%mm4 \n\t" \
508 "psubd %%mm1, %%mm4 \n\t" \
509 "psrad $" #shift ", %%mm1 \n\t"\
510 "psrad $" #shift ", %%mm4 \n\t"\
511 "movq %%mm0, %%mm2 \n\t" \
512 "paddd %%mm7, %%mm0 \n\t" \
513 "psubd %%mm7, %%mm2 \n\t" \
514 "psrad $" #shift ", %%mm0 \n\t"\
515 "psrad $" #shift ", %%mm2 \n\t"\
516 "packssdw %%mm1, %%mm1 \n\t" \
517 "movd %%mm1, " #dst " \n\t"\
518 "packssdw %%mm0, %%mm0 \n\t" \
519 "movd %%mm0, 16+" #dst " \n\t"\
520 "packssdw %%mm2, %%mm2 \n\t" \
521 "movd %%mm2, 96+" #dst " \n\t"\
522 "packssdw %%mm4, %%mm4 \n\t" \
523 "movd %%mm4, 112+" #dst " \n\t"\
524 "movq 88(%2), %%mm1 \n\t" \
525 "pmaddwd %%mm3, %%mm1 \n\t" \
526 "movq %%mm5, %%mm2 \n\t" \
527 "pmaddwd 104(%2), %%mm3 \n\t" \
528 "paddd %%mm1, %%mm2 \n\t" \
529 "psubd %%mm1, %%mm5 \n\t" \
530 "psrad $" #shift ", %%mm2 \n\t"\
531 "psrad $" #shift ", %%mm5 \n\t"\
532 "movq %%mm6, %%mm1 \n\t" \
533 "paddd %%mm3, %%mm6 \n\t" \
534 "psubd %%mm3, %%mm1 \n\t" \
535 "psrad $" #shift ", %%mm6 \n\t"\
536 "psrad $" #shift ", %%mm1 \n\t"\
537 "packssdw %%mm2, %%mm2 \n\t" \
538 "packssdw %%mm6, %%mm6 \n\t" \
539 "movd %%mm2, 32+" #dst " \n\t"\
540 "packssdw %%mm1, %%mm1 \n\t" \
541 "packssdw %%mm5, %%mm5 \n\t" \
542 "movd %%mm6, 48+" #dst " \n\t"\
543 "movd %%mm1, 64+" #dst " \n\t"\
544 "movd %%mm5, 80+" #dst " \n\t"
548 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
549 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
550 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
551 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
556 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
559 #define IDCT(src0, src4, src1, src5, dst, shift) \
560 "movq " #src0 ", %%mm0 \n\t" \
561 "movq " #src1 ", %%mm2 \n\t" \
562 "movq " #src5 ", %%mm3 \n\t" \
563 "movq 16(%2), %%mm4 \n\t" \
564 "pmaddwd %%mm0, %%mm4 \n\t" \
565 "movq 24(%2), %%mm5 \n\t" \
566 "pmaddwd %%mm5, %%mm0 \n\t" \
567 "movq %%mm4, %%mm6 \n\t" \
568 "movq 48(%2), %%mm7 \n\t" \
569 "pmaddwd %%mm2, %%mm7 \n\t" \
570 "movq %%mm0, %%mm5 \n\t" \
571 "movq 56(%2), %%mm1 \n\t" \
572 "pmaddwd %%mm3, %%mm1 \n\t" \
573 "pmaddwd 64(%2), %%mm2 \n\t" \
574 "paddd %%mm1, %%mm7 \n\t" \
575 "movq 72(%2), %%mm1 \n\t" \
576 "pmaddwd %%mm3, %%mm1 \n\t" \
577 "paddd %%mm4, %%mm7 \n\t" \
578 "paddd %%mm4, %%mm4 \n\t" \
579 "psubd %%mm7, %%mm4 \n\t" \
580 "paddd %%mm2, %%mm1 \n\t" \
581 "psrad $" #shift ", %%mm7 \n\t"\
582 "psrad $" #shift ", %%mm4 \n\t"\
583 "movq %%mm0, %%mm2 \n\t" \
584 "paddd %%mm1, %%mm0 \n\t" \
585 "psubd %%mm1, %%mm2 \n\t" \
586 "psrad $" #shift ", %%mm0 \n\t"\
587 "psrad $" #shift ", %%mm2 \n\t"\
588 "packssdw %%mm7, %%mm7 \n\t" \
589 "movd %%mm7, " #dst " \n\t"\
590 "packssdw %%mm0, %%mm0 \n\t" \
591 "movd %%mm0, 16+" #dst " \n\t"\
592 "packssdw %%mm2, %%mm2 \n\t" \
593 "movd %%mm2, 96+" #dst " \n\t"\
594 "packssdw %%mm4, %%mm4 \n\t" \
595 "movd %%mm4, 112+" #dst " \n\t"\
596 "movq " #src1 ", %%mm0 \n\t" \
597 "movq 80(%2), %%mm4 \n\t" \
598 "pmaddwd %%mm0, %%mm4 \n\t" \
599 "movq 88(%2), %%mm7 \n\t" \
600 "pmaddwd 96(%2), %%mm0 \n\t" \
601 "pmaddwd %%mm3, %%mm7 \n\t" \
602 "movq %%mm5, %%mm2 \n\t" \
603 "pmaddwd 104(%2), %%mm3 \n\t" \
604 "paddd %%mm7, %%mm4 \n\t" \
605 "paddd %%mm4, %%mm2 \n\t" \
606 "psubd %%mm4, %%mm5 \n\t" \
607 "psrad $" #shift ", %%mm2 \n\t"\
608 "psrad $" #shift ", %%mm5 \n\t"\
609 "movq %%mm6, %%mm4 \n\t" \
610 "paddd %%mm0, %%mm3 \n\t" \
611 "paddd %%mm3, %%mm6 \n\t" \
612 "psubd %%mm3, %%mm4 \n\t" \
613 "psrad $" #shift ", %%mm6 \n\t"\
614 "psrad $" #shift ", %%mm4 \n\t"\
615 "packssdw %%mm2, %%mm2 \n\t" \
616 "packssdw %%mm6, %%mm6 \n\t" \
617 "movd %%mm2, 32+" #dst " \n\t"\
618 "packssdw %%mm4, %%mm4 \n\t" \
619 "packssdw %%mm5, %%mm5 \n\t" \
620 "movd %%mm6, 48+" #dst " \n\t"\
621 "movd %%mm4, 64+" #dst " \n\t"\
622 "movd %%mm5, 80+" #dst " \n\t"
625 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
626 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
627 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
628 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
634 #define IDCT(src0, src4, src1, src5, dst, shift) \
635 "movq " #src0 ", %%mm0 \n\t" \
636 "movq " #src1 ", %%mm2 \n\t" \
637 "movq 16(%2), %%mm4 \n\t" \
638 "pmaddwd %%mm0, %%mm4 \n\t" \
639 "movq 24(%2), %%mm5 \n\t" \
640 "pmaddwd %%mm5, %%mm0 \n\t" \
641 "movq %%mm4, %%mm6 \n\t" \
642 "movq 48(%2), %%mm7 \n\t" \
643 "pmaddwd %%mm2, %%mm7 \n\t" \
644 "movq %%mm0, %%mm5 \n\t" \
645 "movq 64(%2), %%mm3 \n\t"\
646 "pmaddwd %%mm2, %%mm3 \n\t" \
647 "paddd %%mm4, %%mm7 \n\t" \
648 "paddd %%mm4, %%mm4 \n\t" \
649 "psubd %%mm7, %%mm4 \n\t" \
650 "psrad $" #shift ", %%mm7 \n\t"\
651 "psrad $" #shift ", %%mm4 \n\t"\
652 "movq %%mm0, %%mm1 \n\t" \
653 "paddd %%mm3, %%mm0 \n\t" \
654 "psubd %%mm3, %%mm1 \n\t" \
655 "psrad $" #shift ", %%mm0 \n\t"\
656 "psrad $" #shift ", %%mm1 \n\t"\
657 "packssdw %%mm7, %%mm7 \n\t" \
658 "movd %%mm7, " #dst " \n\t"\
659 "packssdw %%mm0, %%mm0 \n\t" \
660 "movd %%mm0, 16+" #dst " \n\t"\
661 "packssdw %%mm1, %%mm1 \n\t" \
662 "movd %%mm1, 96+" #dst " \n\t"\
663 "packssdw %%mm4, %%mm4 \n\t" \
664 "movd %%mm4, 112+" #dst " \n\t"\
665 "movq 80(%2), %%mm4 \n\t" \
666 "pmaddwd %%mm2, %%mm4 \n\t" \
667 "pmaddwd 96(%2), %%mm2 \n\t" \
668 "movq %%mm5, %%mm1 \n\t" \
669 "paddd %%mm4, %%mm1 \n\t" \
670 "psubd %%mm4, %%mm5 \n\t" \
671 "psrad $" #shift ", %%mm1 \n\t"\
672 "psrad $" #shift ", %%mm5 \n\t"\
673 "movq %%mm6, %%mm4 \n\t" \
674 "paddd %%mm2, %%mm6 \n\t" \
675 "psubd %%mm2, %%mm4 \n\t" \
676 "psrad $" #shift ", %%mm6 \n\t"\
677 "psrad $" #shift ", %%mm4 \n\t"\
678 "packssdw %%mm1, %%mm1 \n\t" \
679 "packssdw %%mm6, %%mm6 \n\t" \
680 "movd %%mm1, 32+" #dst " \n\t"\
681 "packssdw %%mm4, %%mm4 \n\t" \
682 "packssdw %%mm5, %%mm5 \n\t" \
683 "movd %%mm6, 48+" #dst " \n\t"\
684 "movd %%mm4, 64+" #dst " \n\t"\
685 "movd %%mm5, 80+" #dst " \n\t"
689 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
690 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
691 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
692 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
698 #define IDCT(src0, src4, src1, src5, dst, shift) \
699 "movq " #src0 ", %%mm0 \n\t" \
700 "movq " #src4 ", %%mm1 \n\t" \
701 "movq 16(%2), %%mm4 \n\t" \
702 "pmaddwd %%mm0, %%mm4 \n\t" \
703 "movq 24(%2), %%mm5 \n\t" \
704 "pmaddwd %%mm5, %%mm0 \n\t" \
705 "movq 32(%2), %%mm5 \n\t" \
706 "pmaddwd %%mm1, %%mm5 \n\t" \
707 "movq 40(%2), %%mm6 \n\t" \
708 "pmaddwd %%mm6, %%mm1 \n\t" \
709 "movq %%mm4, %%mm6 \n\t" \
710 "paddd %%mm5, %%mm4 \n\t" \
711 "psubd %%mm5, %%mm6 \n\t" \
712 "movq %%mm0, %%mm5 \n\t" \
713 "paddd %%mm1, %%mm0 \n\t" \
714 "psubd %%mm1, %%mm5 \n\t" \
715 "movq 8+" #src0 ", %%mm2 \n\t" \
716 "movq 8+" #src4 ", %%mm3 \n\t" \
717 "movq 16(%2), %%mm1 \n\t" \
718 "pmaddwd %%mm2, %%mm1 \n\t" \
719 "movq 24(%2), %%mm7 \n\t" \
720 "pmaddwd %%mm7, %%mm2 \n\t" \
721 "movq 32(%2), %%mm7 \n\t" \
722 "pmaddwd %%mm3, %%mm7 \n\t" \
723 "pmaddwd 40(%2), %%mm3 \n\t" \
724 "paddd %%mm1, %%mm7 \n\t" \
725 "paddd %%mm1, %%mm1 \n\t" \
726 "psubd %%mm7, %%mm1 \n\t" \
727 "paddd %%mm2, %%mm3 \n\t" \
728 "paddd %%mm2, %%mm2 \n\t" \
729 "psubd %%mm3, %%mm2 \n\t" \
730 "psrad $" #shift ", %%mm4 \n\t"\
731 "psrad $" #shift ", %%mm7 \n\t"\
732 "psrad $" #shift ", %%mm3 \n\t"\
733 "packssdw %%mm7, %%mm4 \n\t" \
734 "movq %%mm4, " #dst " \n\t"\
735 "psrad $" #shift ", %%mm0 \n\t"\
736 "packssdw %%mm3, %%mm0 \n\t" \
737 "movq %%mm0, 16+" #dst " \n\t"\
738 "movq %%mm0, 96+" #dst " \n\t"\
739 "movq %%mm4, 112+" #dst " \n\t"\
740 "psrad $" #shift ", %%mm5 \n\t"\
741 "psrad $" #shift ", %%mm6 \n\t"\
742 "psrad $" #shift ", %%mm2 \n\t"\
743 "packssdw %%mm2, %%mm5 \n\t" \
744 "movq %%mm5, 32+" #dst " \n\t"\
745 "psrad $" #shift ", %%mm1 \n\t"\
746 "packssdw %%mm1, %%mm6 \n\t" \
747 "movq %%mm6, 48+" #dst " \n\t"\
748 "movq %%mm6, 64+" #dst " \n\t"\
749 "movq %%mm5, 80+" #dst " \n\t"
753 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
755 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
763 #define IDCT(src0, src4, src1, src5, dst, shift) \
764 "movq " #src0 ", %%mm0 \n\t" \
765 "movq " #src4 ", %%mm1 \n\t" \
766 "movq " #src1 ", %%mm2 \n\t" \
767 "movq 16(%2), %%mm4 \n\t" \
768 "pmaddwd %%mm0, %%mm4 \n\t" \
769 "movq 24(%2), %%mm5 \n\t" \
770 "pmaddwd %%mm5, %%mm0 \n\t" \
771 "movq 32(%2), %%mm5 \n\t" \
772 "pmaddwd %%mm1, %%mm5 \n\t" \
773 "movq 40(%2), %%mm6 \n\t" \
774 "pmaddwd %%mm6, %%mm1 \n\t" \
775 "movq %%mm4, %%mm6 \n\t" \
776 "movq 48(%2), %%mm7 \n\t" \
777 "pmaddwd %%mm2, %%mm7 \n\t" \
778 "paddd %%mm5, %%mm4 \n\t" \
779 "psubd %%mm5, %%mm6 \n\t" \
780 "movq %%mm0, %%mm5 \n\t" \
781 "paddd %%mm1, %%mm0 \n\t" \
782 "psubd %%mm1, %%mm5 \n\t" \
783 "movq 64(%2), %%mm1 \n\t"\
784 "pmaddwd %%mm2, %%mm1 \n\t" \
785 "paddd %%mm4, %%mm7 \n\t" \
786 "paddd %%mm4, %%mm4 \n\t" \
787 "psubd %%mm7, %%mm4 \n\t" \
788 "psrad $" #shift ", %%mm7 \n\t"\
789 "psrad $" #shift ", %%mm4 \n\t"\
790 "movq %%mm0, %%mm3 \n\t" \
791 "paddd %%mm1, %%mm0 \n\t" \
792 "psubd %%mm1, %%mm3 \n\t" \
793 "psrad $" #shift ", %%mm0 \n\t"\
794 "psrad $" #shift ", %%mm3 \n\t"\
795 "packssdw %%mm7, %%mm7 \n\t" \
796 "movd %%mm7, " #dst " \n\t"\
797 "packssdw %%mm0, %%mm0 \n\t" \
798 "movd %%mm0, 16+" #dst " \n\t"\
799 "packssdw %%mm3, %%mm3 \n\t" \
800 "movd %%mm3, 96+" #dst " \n\t"\
801 "packssdw %%mm4, %%mm4 \n\t" \
802 "movd %%mm4, 112+" #dst " \n\t"\
803 "movq 80(%2), %%mm4 \n\t" \
804 "pmaddwd %%mm2, %%mm4 \n\t" \
805 "pmaddwd 96(%2), %%mm2 \n\t" \
806 "movq %%mm5, %%mm3 \n\t" \
807 "paddd %%mm4, %%mm3 \n\t" \
808 "psubd %%mm4, %%mm5 \n\t" \
809 "psrad $" #shift ", %%mm3 \n\t"\
810 "psrad $" #shift ", %%mm5 \n\t"\
811 "movq %%mm6, %%mm4 \n\t" \
812 "paddd %%mm2, %%mm6 \n\t" \
813 "psubd %%mm2, %%mm4 \n\t" \
814 "psrad $" #shift ", %%mm6 \n\t"\
815 "packssdw %%mm3, %%mm3 \n\t" \
816 "movd %%mm3, 32+" #dst " \n\t"\
817 "psrad $" #shift ", %%mm4 \n\t"\
818 "packssdw %%mm6, %%mm6 \n\t" \
819 "movd %%mm6, 48+" #dst " \n\t"\
820 "packssdw %%mm4, %%mm4 \n\t" \
821 "packssdw %%mm5, %%mm5 \n\t" \
822 "movd %%mm4, 64+" #dst " \n\t"\
823 "movd %%mm5, 80+" #dst " \n\t"
827 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
828 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
829 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
830 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
837 #define IDCT(src0, src4, src1, src5, dst, shift) \
838 "movq " #src0 ", %%mm0 \n\t" \
839 "movq 16(%2), %%mm4 \n\t" \
840 "pmaddwd %%mm0, %%mm4 \n\t" \
841 "movq 24(%2), %%mm5 \n\t" \
842 "pmaddwd %%mm5, %%mm0 \n\t" \
843 "psrad $" #shift ", %%mm4 \n\t"\
844 "psrad $" #shift ", %%mm0 \n\t"\
845 "movq 8+" #src0 ", %%mm2 \n\t" \
846 "movq 16(%2), %%mm1 \n\t" \
847 "pmaddwd %%mm2, %%mm1 \n\t" \
848 "movq 24(%2), %%mm7 \n\t" \
849 "pmaddwd %%mm7, %%mm2 \n\t" \
850 "movq 32(%2), %%mm7 \n\t" \
851 "psrad $" #shift ", %%mm1 \n\t"\
852 "packssdw %%mm1, %%mm4 \n\t" \
853 "movq %%mm4, " #dst " \n\t"\
854 "psrad $" #shift ", %%mm2 \n\t"\
855 "packssdw %%mm2, %%mm0 \n\t" \
856 "movq %%mm0, 16+" #dst " \n\t"\
857 "movq %%mm0, 96+" #dst " \n\t"\
858 "movq %%mm4, 112+" #dst " \n\t"\
859 "movq %%mm0, 32+" #dst " \n\t"\
860 "movq %%mm4, 48+" #dst " \n\t"\
861 "movq %%mm4, 64+" #dst " \n\t"\
862 "movq %%mm0, 80+" #dst " \n\t"
865 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
867 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
void(* ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift)
static void idct(int16_t block[64])
Memory handling functions.
void(* ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
#define LOCAL_ALIGNED_8(t, v,...)
#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift, bt)
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
void ff_simple_idct_mmx(int16_t *block)
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
static const int16_t coeffs[]
#define NAMED_CONSTRAINTS_ADD(...)