00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "libavcodec/dsputil.h"
00023 #include "libavcodec/simple_idct.h"
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00036 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00037 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00038 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00039 #if 0
00040 #define C4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00041 #else
00042 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
00043 #endif
00044 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00045 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00046 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00047
00048 #define ROW_SHIFT 11
00049 #define COL_SHIFT 20 // 6
00050
00051 DECLARE_ASM_CONST(8, uint64_t, wm1010)= 0xFFFF0000FFFF0000ULL;
00052 DECLARE_ASM_CONST(8, uint64_t, d40000)= 0x0000000000040000ULL;
00053
00054 DECLARE_ALIGNED(8, static const int16_t, coeffs[])= {
00055 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
00056
00057
00058 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
00059
00060
00061
00062
00063 C4, C4, C4, C4,
00064 C4, -C4, C4, -C4,
00065
00066 C2, C6, C2, C6,
00067 C6, -C2, C6, -C2,
00068
00069 C1, C3, C1, C3,
00070 C5, C7, C5, C7,
00071
00072 C3, -C7, C3, -C7,
00073 -C1, -C5, -C1, -C5,
00074
00075 C5, -C1, C5, -C1,
00076 C7, C3, C7, C3,
00077
00078 C7, -C5, C7, -C5,
00079 C3, -C1, C3, -C1
00080 };
00081
00082 #if 0
00083 static void unused_var_killer(void)
00084 {
00085 int a= wm1010 + d40000;
00086 temp[0]=a;
00087 }
00088
00089 static void inline idctCol (int16_t * col, int16_t *input)
00090 {
00091 #undef C0
00092 #undef C1
00093 #undef C2
00094 #undef C3
00095 #undef C4
00096 #undef C5
00097 #undef C6
00098 #undef C7
00099 int a0, a1, a2, a3, b0, b1, b2, b3;
00100 const int C0 = 23170;
00101 const int C1 = 22725;
00102 const int C2 = 21407;
00103 const int C3 = 19266;
00104 const int C4 = 16383;
00105 const int C5 = 12873;
00106 const int C6 = 8867;
00107 const int C7 = 4520;
00108
00109
00110
00111
00112
00113
00114
00115 col[8*0] = input[8*0 + 0];
00116 col[8*1] = input[8*2 + 0];
00117 col[8*2] = input[8*0 + 1];
00118 col[8*3] = input[8*2 + 1];
00119 col[8*4] = input[8*4 + 0];
00120 col[8*5] = input[8*6 + 0];
00121 col[8*6] = input[8*4 + 1];
00122 col[8*7] = input[8*6 + 1];
00123
00124 a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6] + (1<<(COL_SHIFT-1));
00125 a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6] + (1<<(COL_SHIFT-1));
00126 a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6] + (1<<(COL_SHIFT-1));
00127 a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6] + (1<<(COL_SHIFT-1));
00128
00129 b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
00130 b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
00131 b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
00132 b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
00133
00134 col[8*0] = (a0 + b0) >> COL_SHIFT;
00135 col[8*1] = (a1 + b1) >> COL_SHIFT;
00136 col[8*2] = (a2 + b2) >> COL_SHIFT;
00137 col[8*3] = (a3 + b3) >> COL_SHIFT;
00138 col[8*4] = (a3 - b3) >> COL_SHIFT;
00139 col[8*5] = (a2 - b2) >> COL_SHIFT;
00140 col[8*6] = (a1 - b1) >> COL_SHIFT;
00141 col[8*7] = (a0 - b0) >> COL_SHIFT;
00142 }
00143
00144 static void inline idctRow (int16_t * output, int16_t * input)
00145 {
00146 int16_t row[8];
00147
00148 int a0, a1, a2, a3, b0, b1, b2, b3;
00149 const int C0 = 23170;
00150 const int C1 = 22725;
00151 const int C2 = 21407;
00152 const int C3 = 19266;
00153 const int C4 = 16383;
00154 const int C5 = 12873;
00155 const int C6 = 8867;
00156 const int C7 = 4520;
00157
00158 row[0] = input[0];
00159 row[2] = input[1];
00160 row[4] = input[4];
00161 row[6] = input[5];
00162 row[1] = input[8];
00163 row[3] = input[9];
00164 row[5] = input[12];
00165 row[7] = input[13];
00166
00167 if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7]) ) {
00168 row[0] = row[1] = row[2] = row[3] = row[4] =
00169 row[5] = row[6] = row[7] = row[0]<<3;
00170 output[0] = row[0];
00171 output[2] = row[1];
00172 output[4] = row[2];
00173 output[6] = row[3];
00174 output[8] = row[4];
00175 output[10] = row[5];
00176 output[12] = row[6];
00177 output[14] = row[7];
00178 return;
00179 }
00180
00181 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
00182 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
00183 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
00184 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
00185
00186 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
00187 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
00188 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
00189 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
00190
00191 row[0] = (a0 + b0) >> ROW_SHIFT;
00192 row[1] = (a1 + b1) >> ROW_SHIFT;
00193 row[2] = (a2 + b2) >> ROW_SHIFT;
00194 row[3] = (a3 + b3) >> ROW_SHIFT;
00195 row[4] = (a3 - b3) >> ROW_SHIFT;
00196 row[5] = (a2 - b2) >> ROW_SHIFT;
00197 row[6] = (a1 - b1) >> ROW_SHIFT;
00198 row[7] = (a0 - b0) >> ROW_SHIFT;
00199
00200 output[0] = row[0];
00201 output[2] = row[1];
00202 output[4] = row[2];
00203 output[6] = row[3];
00204 output[8] = row[4];
00205 output[10] = row[5];
00206 output[12] = row[6];
00207 output[14] = row[7];
00208 }
00209 #endif
00210
00211 static inline void idct(int16_t *block)
00212 {
00213 DECLARE_ALIGNED(8, int64_t, align_tmp[16]);
00214 int16_t * const temp= (int16_t*)align_tmp;
00215
00216 __asm__ volatile(
00217 #if 0 //Alternative, simpler variant
00218
00219 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00220 "movq " #src0 ", %%mm0 \n\t" \
00221 "movq " #src4 ", %%mm1 \n\t" \
00222 "movq " #src1 ", %%mm2 \n\t" \
00223 "movq " #src5 ", %%mm3 \n\t" \
00224 "movq 16(%2), %%mm4 \n\t" \
00225 "pmaddwd %%mm0, %%mm4 \n\t" \
00226 "movq 24(%2), %%mm5 \n\t" \
00227 "pmaddwd %%mm5, %%mm0 \n\t" \
00228 "movq 32(%2), %%mm5 \n\t" \
00229 "pmaddwd %%mm1, %%mm5 \n\t" \
00230 "movq 40(%2), %%mm6 \n\t" \
00231 "pmaddwd %%mm6, %%mm1 \n\t" \
00232 "movq 48(%2), %%mm7 \n\t" \
00233 "pmaddwd %%mm2, %%mm7 \n\t" \
00234 #rounder ", %%mm4 \n\t"\
00235 "movq %%mm4, %%mm6 \n\t" \
00236 "paddd %%mm5, %%mm4 \n\t" \
00237 "psubd %%mm5, %%mm6 \n\t" \
00238 "movq 56(%2), %%mm5 \n\t" \
00239 "pmaddwd %%mm3, %%mm5 \n\t" \
00240 #rounder ", %%mm0 \n\t"\
00241 "paddd %%mm0, %%mm1 \n\t" \
00242 "paddd %%mm0, %%mm0 \n\t" \
00243 "psubd %%mm1, %%mm0 \n\t" \
00244 "pmaddwd 64(%2), %%mm2 \n\t" \
00245 "paddd %%mm5, %%mm7 \n\t" \
00246 "movq 72(%2), %%mm5 \n\t" \
00247 "pmaddwd %%mm3, %%mm5 \n\t" \
00248 "paddd %%mm4, %%mm7 \n\t" \
00249 "paddd %%mm4, %%mm4 \n\t" \
00250 "psubd %%mm7, %%mm4 \n\t" \
00251 "paddd %%mm2, %%mm5 \n\t" \
00252 "psrad $" #shift ", %%mm7 \n\t"\
00253 "psrad $" #shift ", %%mm4 \n\t"\
00254 "movq %%mm1, %%mm2 \n\t" \
00255 "paddd %%mm5, %%mm1 \n\t" \
00256 "psubd %%mm5, %%mm2 \n\t" \
00257 "psrad $" #shift ", %%mm1 \n\t"\
00258 "psrad $" #shift ", %%mm2 \n\t"\
00259 "packssdw %%mm1, %%mm7 \n\t" \
00260 "packssdw %%mm4, %%mm2 \n\t" \
00261 "movq %%mm7, " #dst " \n\t"\
00262 "movq " #src1 ", %%mm1 \n\t" \
00263 "movq 80(%2), %%mm4 \n\t" \
00264 "movq %%mm2, 24+" #dst " \n\t"\
00265 "pmaddwd %%mm1, %%mm4 \n\t" \
00266 "movq 88(%2), %%mm7 \n\t" \
00267 "pmaddwd 96(%2), %%mm1 \n\t" \
00268 "pmaddwd %%mm3, %%mm7 \n\t" \
00269 "movq %%mm0, %%mm2 \n\t" \
00270 "pmaddwd 104(%2), %%mm3 \n\t" \
00271 "paddd %%mm7, %%mm4 \n\t" \
00272 "paddd %%mm4, %%mm2 \n\t" \
00273 "psubd %%mm4, %%mm0 \n\t" \
00274 "psrad $" #shift ", %%mm2 \n\t"\
00275 "psrad $" #shift ", %%mm0 \n\t"\
00276 "movq %%mm6, %%mm4 \n\t" \
00277 "paddd %%mm1, %%mm3 \n\t" \
00278 "paddd %%mm3, %%mm6 \n\t" \
00279 "psubd %%mm3, %%mm4 \n\t" \
00280 "psrad $" #shift ", %%mm6 \n\t"\
00281 "packssdw %%mm6, %%mm2 \n\t" \
00282 "movq %%mm2, 8+" #dst " \n\t"\
00283 "psrad $" #shift ", %%mm4 \n\t"\
00284 "packssdw %%mm0, %%mm4 \n\t" \
00285 "movq %%mm4, 16+" #dst " \n\t"\
00286
00287 #define COL_IDCT(src0, src4, src1, src5, dst, shift) \
00288 "movq " #src0 ", %%mm0 \n\t" \
00289 "movq " #src4 ", %%mm1 \n\t" \
00290 "movq " #src1 ", %%mm2 \n\t" \
00291 "movq " #src5 ", %%mm3 \n\t" \
00292 "movq 16(%2), %%mm4 \n\t" \
00293 "pmaddwd %%mm0, %%mm4 \n\t" \
00294 "movq 24(%2), %%mm5 \n\t" \
00295 "pmaddwd %%mm5, %%mm0 \n\t" \
00296 "movq 32(%2), %%mm5 \n\t" \
00297 "pmaddwd %%mm1, %%mm5 \n\t" \
00298 "movq 40(%2), %%mm6 \n\t" \
00299 "pmaddwd %%mm6, %%mm1 \n\t" \
00300 "movq %%mm4, %%mm6 \n\t" \
00301 "movq 48(%2), %%mm7 \n\t" \
00302 "pmaddwd %%mm2, %%mm7 \n\t" \
00303 "paddd %%mm5, %%mm4 \n\t" \
00304 "psubd %%mm5, %%mm6 \n\t" \
00305 "movq %%mm0, %%mm5 \n\t" \
00306 "paddd %%mm1, %%mm0 \n\t" \
00307 "psubd %%mm1, %%mm5 \n\t" \
00308 "movq 56(%2), %%mm1 \n\t" \
00309 "pmaddwd %%mm3, %%mm1 \n\t" \
00310 "pmaddwd 64(%2), %%mm2 \n\t" \
00311 "paddd %%mm1, %%mm7 \n\t" \
00312 "movq 72(%2), %%mm1 \n\t" \
00313 "pmaddwd %%mm3, %%mm1 \n\t" \
00314 "paddd %%mm4, %%mm7 \n\t" \
00315 "paddd %%mm4, %%mm4 \n\t" \
00316 "psubd %%mm7, %%mm4 \n\t" \
00317 "paddd %%mm2, %%mm1 \n\t" \
00318 "psrad $" #shift ", %%mm7 \n\t"\
00319 "psrad $" #shift ", %%mm4 \n\t"\
00320 "movq %%mm0, %%mm2 \n\t" \
00321 "paddd %%mm1, %%mm0 \n\t" \
00322 "psubd %%mm1, %%mm2 \n\t" \
00323 "psrad $" #shift ", %%mm0 \n\t"\
00324 "psrad $" #shift ", %%mm2 \n\t"\
00325 "packssdw %%mm7, %%mm7 \n\t" \
00326 "movd %%mm7, " #dst " \n\t"\
00327 "packssdw %%mm0, %%mm0 \n\t" \
00328 "movd %%mm0, 16+" #dst " \n\t"\
00329 "packssdw %%mm2, %%mm2 \n\t" \
00330 "movd %%mm2, 96+" #dst " \n\t"\
00331 "packssdw %%mm4, %%mm4 \n\t" \
00332 "movd %%mm4, 112+" #dst " \n\t"\
00333 "movq " #src1 ", %%mm0 \n\t" \
00334 "movq 80(%2), %%mm4 \n\t" \
00335 "pmaddwd %%mm0, %%mm4 \n\t" \
00336 "movq 88(%2), %%mm7 \n\t" \
00337 "pmaddwd 96(%2), %%mm0 \n\t" \
00338 "pmaddwd %%mm3, %%mm7 \n\t" \
00339 "movq %%mm5, %%mm2 \n\t" \
00340 "pmaddwd 104(%2), %%mm3 \n\t" \
00341 "paddd %%mm7, %%mm4 \n\t" \
00342 "paddd %%mm4, %%mm2 \n\t" \
00343 "psubd %%mm4, %%mm5 \n\t" \
00344 "psrad $" #shift ", %%mm2 \n\t"\
00345 "psrad $" #shift ", %%mm5 \n\t"\
00346 "movq %%mm6, %%mm4 \n\t" \
00347 "paddd %%mm0, %%mm3 \n\t" \
00348 "paddd %%mm3, %%mm6 \n\t" \
00349 "psubd %%mm3, %%mm4 \n\t" \
00350 "psrad $" #shift ", %%mm6 \n\t"\
00351 "psrad $" #shift ", %%mm4 \n\t"\
00352 "packssdw %%mm2, %%mm2 \n\t" \
00353 "packssdw %%mm6, %%mm6 \n\t" \
00354 "movd %%mm2, 32+" #dst " \n\t"\
00355 "packssdw %%mm4, %%mm4 \n\t" \
00356 "packssdw %%mm5, %%mm5 \n\t" \
00357 "movd %%mm6, 48+" #dst " \n\t"\
00358 "movd %%mm4, 64+" #dst " \n\t"\
00359 "movd %%mm5, 80+" #dst " \n\t"\
00360
00361
00362 #define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00363 "movq " #src0 ", %%mm0 \n\t" \
00364 "movq " #src4 ", %%mm1 \n\t" \
00365 "movq " #src1 ", %%mm2 \n\t" \
00366 "movq " #src5 ", %%mm3 \n\t" \
00367 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
00368 "pand %%mm0, %%mm4 \n\t"\
00369 "por %%mm1, %%mm4 \n\t"\
00370 "por %%mm2, %%mm4 \n\t"\
00371 "por %%mm3, %%mm4 \n\t"\
00372 "packssdw %%mm4,%%mm4 \n\t"\
00373 "movd %%mm4, %%eax \n\t"\
00374 "orl %%eax, %%eax \n\t"\
00375 "jz 1f \n\t"\
00376 "movq 16(%2), %%mm4 \n\t" \
00377 "pmaddwd %%mm0, %%mm4 \n\t" \
00378 "movq 24(%2), %%mm5 \n\t" \
00379 "pmaddwd %%mm5, %%mm0 \n\t" \
00380 "movq 32(%2), %%mm5 \n\t" \
00381 "pmaddwd %%mm1, %%mm5 \n\t" \
00382 "movq 40(%2), %%mm6 \n\t" \
00383 "pmaddwd %%mm6, %%mm1 \n\t" \
00384 "movq 48(%2), %%mm7 \n\t" \
00385 "pmaddwd %%mm2, %%mm7 \n\t" \
00386 #rounder ", %%mm4 \n\t"\
00387 "movq %%mm4, %%mm6 \n\t" \
00388 "paddd %%mm5, %%mm4 \n\t" \
00389 "psubd %%mm5, %%mm6 \n\t" \
00390 "movq 56(%2), %%mm5 \n\t" \
00391 "pmaddwd %%mm3, %%mm5 \n\t" \
00392 #rounder ", %%mm0 \n\t"\
00393 "paddd %%mm0, %%mm1 \n\t" \
00394 "paddd %%mm0, %%mm0 \n\t" \
00395 "psubd %%mm1, %%mm0 \n\t" \
00396 "pmaddwd 64(%2), %%mm2 \n\t" \
00397 "paddd %%mm5, %%mm7 \n\t" \
00398 "movq 72(%2), %%mm5 \n\t" \
00399 "pmaddwd %%mm3, %%mm5 \n\t" \
00400 "paddd %%mm4, %%mm7 \n\t" \
00401 "paddd %%mm4, %%mm4 \n\t" \
00402 "psubd %%mm7, %%mm4 \n\t" \
00403 "paddd %%mm2, %%mm5 \n\t" \
00404 "psrad $" #shift ", %%mm7 \n\t"\
00405 "psrad $" #shift ", %%mm4 \n\t"\
00406 "movq %%mm1, %%mm2 \n\t" \
00407 "paddd %%mm5, %%mm1 \n\t" \
00408 "psubd %%mm5, %%mm2 \n\t" \
00409 "psrad $" #shift ", %%mm1 \n\t"\
00410 "psrad $" #shift ", %%mm2 \n\t"\
00411 "packssdw %%mm1, %%mm7 \n\t" \
00412 "packssdw %%mm4, %%mm2 \n\t" \
00413 "movq %%mm7, " #dst " \n\t"\
00414 "movq " #src1 ", %%mm1 \n\t" \
00415 "movq 80(%2), %%mm4 \n\t" \
00416 "movq %%mm2, 24+" #dst " \n\t"\
00417 "pmaddwd %%mm1, %%mm4 \n\t" \
00418 "movq 88(%2), %%mm7 \n\t" \
00419 "pmaddwd 96(%2), %%mm1 \n\t" \
00420 "pmaddwd %%mm3, %%mm7 \n\t" \
00421 "movq %%mm0, %%mm2 \n\t" \
00422 "pmaddwd 104(%2), %%mm3 \n\t" \
00423 "paddd %%mm7, %%mm4 \n\t" \
00424 "paddd %%mm4, %%mm2 \n\t" \
00425 "psubd %%mm4, %%mm0 \n\t" \
00426 "psrad $" #shift ", %%mm2 \n\t"\
00427 "psrad $" #shift ", %%mm0 \n\t"\
00428 "movq %%mm6, %%mm4 \n\t" \
00429 "paddd %%mm1, %%mm3 \n\t" \
00430 "paddd %%mm3, %%mm6 \n\t" \
00431 "psubd %%mm3, %%mm4 \n\t" \
00432 "psrad $" #shift ", %%mm6 \n\t"\
00433 "packssdw %%mm6, %%mm2 \n\t" \
00434 "movq %%mm2, 8+" #dst " \n\t"\
00435 "psrad $" #shift ", %%mm4 \n\t"\
00436 "packssdw %%mm0, %%mm4 \n\t" \
00437 "movq %%mm4, 16+" #dst " \n\t"\
00438 "jmp 2f \n\t"\
00439 "1: \n\t"\
00440 "pslld $16, %%mm0 \n\t"\
00441 "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
00442 "psrad $13, %%mm0 \n\t"\
00443 "packssdw %%mm0, %%mm0 \n\t"\
00444 "movq %%mm0, " #dst " \n\t"\
00445 "movq %%mm0, 8+" #dst " \n\t"\
00446 "movq %%mm0, 16+" #dst " \n\t"\
00447 "movq %%mm0, 24+" #dst " \n\t"\
00448 "2: \n\t"
00449
00450
00451
00452 ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
00453
00454
00455
00456
00457 DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
00458 DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
00459 DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
00460
00461
00462
00463 COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00464 COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00465 COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00466 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00467
00468 #else
00469
00470 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00471 "movq " #src0 ", %%mm0 \n\t" \
00472 "movq " #src4 ", %%mm1 \n\t" \
00473 "movq " #src1 ", %%mm2 \n\t" \
00474 "movq " #src5 ", %%mm3 \n\t" \
00475 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
00476 "pand %%mm0, %%mm4 \n\t"\
00477 "por %%mm1, %%mm4 \n\t"\
00478 "por %%mm2, %%mm4 \n\t"\
00479 "por %%mm3, %%mm4 \n\t"\
00480 "packssdw %%mm4,%%mm4 \n\t"\
00481 "movd %%mm4, %%eax \n\t"\
00482 "orl %%eax, %%eax \n\t"\
00483 "jz 1f \n\t"\
00484 "movq 16(%2), %%mm4 \n\t" \
00485 "pmaddwd %%mm0, %%mm4 \n\t" \
00486 "movq 24(%2), %%mm5 \n\t" \
00487 "pmaddwd %%mm5, %%mm0 \n\t" \
00488 "movq 32(%2), %%mm5 \n\t" \
00489 "pmaddwd %%mm1, %%mm5 \n\t" \
00490 "movq 40(%2), %%mm6 \n\t" \
00491 "pmaddwd %%mm6, %%mm1 \n\t" \
00492 "movq 48(%2), %%mm7 \n\t" \
00493 "pmaddwd %%mm2, %%mm7 \n\t" \
00494 #rounder ", %%mm4 \n\t"\
00495 "movq %%mm4, %%mm6 \n\t" \
00496 "paddd %%mm5, %%mm4 \n\t" \
00497 "psubd %%mm5, %%mm6 \n\t" \
00498 "movq 56(%2), %%mm5 \n\t" \
00499 "pmaddwd %%mm3, %%mm5 \n\t" \
00500 #rounder ", %%mm0 \n\t"\
00501 "paddd %%mm0, %%mm1 \n\t" \
00502 "paddd %%mm0, %%mm0 \n\t" \
00503 "psubd %%mm1, %%mm0 \n\t" \
00504 "pmaddwd 64(%2), %%mm2 \n\t" \
00505 "paddd %%mm5, %%mm7 \n\t" \
00506 "movq 72(%2), %%mm5 \n\t" \
00507 "pmaddwd %%mm3, %%mm5 \n\t" \
00508 "paddd %%mm4, %%mm7 \n\t" \
00509 "paddd %%mm4, %%mm4 \n\t" \
00510 "psubd %%mm7, %%mm4 \n\t" \
00511 "paddd %%mm2, %%mm5 \n\t" \
00512 "psrad $" #shift ", %%mm7 \n\t"\
00513 "psrad $" #shift ", %%mm4 \n\t"\
00514 "movq %%mm1, %%mm2 \n\t" \
00515 "paddd %%mm5, %%mm1 \n\t" \
00516 "psubd %%mm5, %%mm2 \n\t" \
00517 "psrad $" #shift ", %%mm1 \n\t"\
00518 "psrad $" #shift ", %%mm2 \n\t"\
00519 "packssdw %%mm1, %%mm7 \n\t" \
00520 "packssdw %%mm4, %%mm2 \n\t" \
00521 "movq %%mm7, " #dst " \n\t"\
00522 "movq " #src1 ", %%mm1 \n\t" \
00523 "movq 80(%2), %%mm4 \n\t" \
00524 "movq %%mm2, 24+" #dst " \n\t"\
00525 "pmaddwd %%mm1, %%mm4 \n\t" \
00526 "movq 88(%2), %%mm7 \n\t" \
00527 "pmaddwd 96(%2), %%mm1 \n\t" \
00528 "pmaddwd %%mm3, %%mm7 \n\t" \
00529 "movq %%mm0, %%mm2 \n\t" \
00530 "pmaddwd 104(%2), %%mm3 \n\t" \
00531 "paddd %%mm7, %%mm4 \n\t" \
00532 "paddd %%mm4, %%mm2 \n\t" \
00533 "psubd %%mm4, %%mm0 \n\t" \
00534 "psrad $" #shift ", %%mm2 \n\t"\
00535 "psrad $" #shift ", %%mm0 \n\t"\
00536 "movq %%mm6, %%mm4 \n\t" \
00537 "paddd %%mm1, %%mm3 \n\t" \
00538 "paddd %%mm3, %%mm6 \n\t" \
00539 "psubd %%mm3, %%mm4 \n\t" \
00540 "psrad $" #shift ", %%mm6 \n\t"\
00541 "packssdw %%mm6, %%mm2 \n\t" \
00542 "movq %%mm2, 8+" #dst " \n\t"\
00543 "psrad $" #shift ", %%mm4 \n\t"\
00544 "packssdw %%mm0, %%mm4 \n\t" \
00545 "movq %%mm4, 16+" #dst " \n\t"\
00546 "jmp 2f \n\t"\
00547 "1: \n\t"\
00548 "pslld $16, %%mm0 \n\t"\
00549 "paddd "MANGLE(d40000)", %%mm0 \n\t"\
00550 "psrad $13, %%mm0 \n\t"\
00551 "packssdw %%mm0, %%mm0 \n\t"\
00552 "movq %%mm0, " #dst " \n\t"\
00553 "movq %%mm0, 8+" #dst " \n\t"\
00554 "movq %%mm0, 16+" #dst " \n\t"\
00555 "movq %%mm0, 24+" #dst " \n\t"\
00556 "2: \n\t"
00557
00558 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
00559 "movq " #src0 ", %%mm0 \n\t" \
00560 "movq " #src4 ", %%mm1 \n\t" \
00561 "movq " #src1 ", %%mm2 \n\t" \
00562 "movq " #src5 ", %%mm3 \n\t" \
00563 "movq %%mm0, %%mm4 \n\t"\
00564 "por %%mm1, %%mm4 \n\t"\
00565 "por %%mm2, %%mm4 \n\t"\
00566 "por %%mm3, %%mm4 \n\t"\
00567 "packssdw %%mm4,%%mm4 \n\t"\
00568 "movd %%mm4, %%eax \n\t"\
00569 "orl %%eax, %%eax \n\t"\
00570 "jz " #bt " \n\t"\
00571 "movq 16(%2), %%mm4 \n\t" \
00572 "pmaddwd %%mm0, %%mm4 \n\t" \
00573 "movq 24(%2), %%mm5 \n\t" \
00574 "pmaddwd %%mm5, %%mm0 \n\t" \
00575 "movq 32(%2), %%mm5 \n\t" \
00576 "pmaddwd %%mm1, %%mm5 \n\t" \
00577 "movq 40(%2), %%mm6 \n\t" \
00578 "pmaddwd %%mm6, %%mm1 \n\t" \
00579 "movq 48(%2), %%mm7 \n\t" \
00580 "pmaddwd %%mm2, %%mm7 \n\t" \
00581 #rounder ", %%mm4 \n\t"\
00582 "movq %%mm4, %%mm6 \n\t" \
00583 "paddd %%mm5, %%mm4 \n\t" \
00584 "psubd %%mm5, %%mm6 \n\t" \
00585 "movq 56(%2), %%mm5 \n\t" \
00586 "pmaddwd %%mm3, %%mm5 \n\t" \
00587 #rounder ", %%mm0 \n\t"\
00588 "paddd %%mm0, %%mm1 \n\t" \
00589 "paddd %%mm0, %%mm0 \n\t" \
00590 "psubd %%mm1, %%mm0 \n\t" \
00591 "pmaddwd 64(%2), %%mm2 \n\t" \
00592 "paddd %%mm5, %%mm7 \n\t" \
00593 "movq 72(%2), %%mm5 \n\t" \
00594 "pmaddwd %%mm3, %%mm5 \n\t" \
00595 "paddd %%mm4, %%mm7 \n\t" \
00596 "paddd %%mm4, %%mm4 \n\t" \
00597 "psubd %%mm7, %%mm4 \n\t" \
00598 "paddd %%mm2, %%mm5 \n\t" \
00599 "psrad $" #shift ", %%mm7 \n\t"\
00600 "psrad $" #shift ", %%mm4 \n\t"\
00601 "movq %%mm1, %%mm2 \n\t" \
00602 "paddd %%mm5, %%mm1 \n\t" \
00603 "psubd %%mm5, %%mm2 \n\t" \
00604 "psrad $" #shift ", %%mm1 \n\t"\
00605 "psrad $" #shift ", %%mm2 \n\t"\
00606 "packssdw %%mm1, %%mm7 \n\t" \
00607 "packssdw %%mm4, %%mm2 \n\t" \
00608 "movq %%mm7, " #dst " \n\t"\
00609 "movq " #src1 ", %%mm1 \n\t" \
00610 "movq 80(%2), %%mm4 \n\t" \
00611 "movq %%mm2, 24+" #dst " \n\t"\
00612 "pmaddwd %%mm1, %%mm4 \n\t" \
00613 "movq 88(%2), %%mm7 \n\t" \
00614 "pmaddwd 96(%2), %%mm1 \n\t" \
00615 "pmaddwd %%mm3, %%mm7 \n\t" \
00616 "movq %%mm0, %%mm2 \n\t" \
00617 "pmaddwd 104(%2), %%mm3 \n\t" \
00618 "paddd %%mm7, %%mm4 \n\t" \
00619 "paddd %%mm4, %%mm2 \n\t" \
00620 "psubd %%mm4, %%mm0 \n\t" \
00621 "psrad $" #shift ", %%mm2 \n\t"\
00622 "psrad $" #shift ", %%mm0 \n\t"\
00623 "movq %%mm6, %%mm4 \n\t" \
00624 "paddd %%mm1, %%mm3 \n\t" \
00625 "paddd %%mm3, %%mm6 \n\t" \
00626 "psubd %%mm3, %%mm4 \n\t" \
00627 "psrad $" #shift ", %%mm6 \n\t"\
00628 "packssdw %%mm6, %%mm2 \n\t" \
00629 "movq %%mm2, 8+" #dst " \n\t"\
00630 "psrad $" #shift ", %%mm4 \n\t"\
00631 "packssdw %%mm0, %%mm4 \n\t" \
00632 "movq %%mm4, 16+" #dst " \n\t"\
00633
00634 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00635 "movq " #src0 ", %%mm0 \n\t" \
00636 "movq " #src4 ", %%mm1 \n\t" \
00637 "movq " #src1 ", %%mm2 \n\t" \
00638 "movq " #src5 ", %%mm3 \n\t" \
00639 "movq 16(%2), %%mm4 \n\t" \
00640 "pmaddwd %%mm0, %%mm4 \n\t" \
00641 "movq 24(%2), %%mm5 \n\t" \
00642 "pmaddwd %%mm5, %%mm0 \n\t" \
00643 "movq 32(%2), %%mm5 \n\t" \
00644 "pmaddwd %%mm1, %%mm5 \n\t" \
00645 "movq 40(%2), %%mm6 \n\t" \
00646 "pmaddwd %%mm6, %%mm1 \n\t" \
00647 "movq 48(%2), %%mm7 \n\t" \
00648 "pmaddwd %%mm2, %%mm7 \n\t" \
00649 #rounder ", %%mm4 \n\t"\
00650 "movq %%mm4, %%mm6 \n\t" \
00651 "paddd %%mm5, %%mm4 \n\t" \
00652 "psubd %%mm5, %%mm6 \n\t" \
00653 "movq 56(%2), %%mm5 \n\t" \
00654 "pmaddwd %%mm3, %%mm5 \n\t" \
00655 #rounder ", %%mm0 \n\t"\
00656 "paddd %%mm0, %%mm1 \n\t" \
00657 "paddd %%mm0, %%mm0 \n\t" \
00658 "psubd %%mm1, %%mm0 \n\t" \
00659 "pmaddwd 64(%2), %%mm2 \n\t" \
00660 "paddd %%mm5, %%mm7 \n\t" \
00661 "movq 72(%2), %%mm5 \n\t" \
00662 "pmaddwd %%mm3, %%mm5 \n\t" \
00663 "paddd %%mm4, %%mm7 \n\t" \
00664 "paddd %%mm4, %%mm4 \n\t" \
00665 "psubd %%mm7, %%mm4 \n\t" \
00666 "paddd %%mm2, %%mm5 \n\t" \
00667 "psrad $" #shift ", %%mm7 \n\t"\
00668 "psrad $" #shift ", %%mm4 \n\t"\
00669 "movq %%mm1, %%mm2 \n\t" \
00670 "paddd %%mm5, %%mm1 \n\t" \
00671 "psubd %%mm5, %%mm2 \n\t" \
00672 "psrad $" #shift ", %%mm1 \n\t"\
00673 "psrad $" #shift ", %%mm2 \n\t"\
00674 "packssdw %%mm1, %%mm7 \n\t" \
00675 "packssdw %%mm4, %%mm2 \n\t" \
00676 "movq %%mm7, " #dst " \n\t"\
00677 "movq " #src1 ", %%mm1 \n\t" \
00678 "movq 80(%2), %%mm4 \n\t" \
00679 "movq %%mm2, 24+" #dst " \n\t"\
00680 "pmaddwd %%mm1, %%mm4 \n\t" \
00681 "movq 88(%2), %%mm7 \n\t" \
00682 "pmaddwd 96(%2), %%mm1 \n\t" \
00683 "pmaddwd %%mm3, %%mm7 \n\t" \
00684 "movq %%mm0, %%mm2 \n\t" \
00685 "pmaddwd 104(%2), %%mm3 \n\t" \
00686 "paddd %%mm7, %%mm4 \n\t" \
00687 "paddd %%mm4, %%mm2 \n\t" \
00688 "psubd %%mm4, %%mm0 \n\t" \
00689 "psrad $" #shift ", %%mm2 \n\t"\
00690 "psrad $" #shift ", %%mm0 \n\t"\
00691 "movq %%mm6, %%mm4 \n\t" \
00692 "paddd %%mm1, %%mm3 \n\t" \
00693 "paddd %%mm3, %%mm6 \n\t" \
00694 "psubd %%mm3, %%mm4 \n\t" \
00695 "psrad $" #shift ", %%mm6 \n\t"\
00696 "packssdw %%mm6, %%mm2 \n\t" \
00697 "movq %%mm2, 8+" #dst " \n\t"\
00698 "psrad $" #shift ", %%mm4 \n\t"\
00699 "packssdw %%mm0, %%mm4 \n\t" \
00700 "movq %%mm4, 16+" #dst " \n\t"\
00701
00702
00703 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
00704 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
00705 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
00706 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
00707
00708 #undef IDCT
00709 #define IDCT(src0, src4, src1, src5, dst, shift) \
00710 "movq " #src0 ", %%mm0 \n\t" \
00711 "movq " #src4 ", %%mm1 \n\t" \
00712 "movq " #src1 ", %%mm2 \n\t" \
00713 "movq " #src5 ", %%mm3 \n\t" \
00714 "movq 16(%2), %%mm4 \n\t" \
00715 "pmaddwd %%mm0, %%mm4 \n\t" \
00716 "movq 24(%2), %%mm5 \n\t" \
00717 "pmaddwd %%mm5, %%mm0 \n\t" \
00718 "movq 32(%2), %%mm5 \n\t" \
00719 "pmaddwd %%mm1, %%mm5 \n\t" \
00720 "movq 40(%2), %%mm6 \n\t" \
00721 "pmaddwd %%mm6, %%mm1 \n\t" \
00722 "movq %%mm4, %%mm6 \n\t" \
00723 "movq 48(%2), %%mm7 \n\t" \
00724 "pmaddwd %%mm2, %%mm7 \n\t" \
00725 "paddd %%mm5, %%mm4 \n\t" \
00726 "psubd %%mm5, %%mm6 \n\t" \
00727 "movq %%mm0, %%mm5 \n\t" \
00728 "paddd %%mm1, %%mm0 \n\t" \
00729 "psubd %%mm1, %%mm5 \n\t" \
00730 "movq 56(%2), %%mm1 \n\t" \
00731 "pmaddwd %%mm3, %%mm1 \n\t" \
00732 "pmaddwd 64(%2), %%mm2 \n\t" \
00733 "paddd %%mm1, %%mm7 \n\t" \
00734 "movq 72(%2), %%mm1 \n\t" \
00735 "pmaddwd %%mm3, %%mm1 \n\t" \
00736 "paddd %%mm4, %%mm7 \n\t" \
00737 "paddd %%mm4, %%mm4 \n\t" \
00738 "psubd %%mm7, %%mm4 \n\t" \
00739 "paddd %%mm2, %%mm1 \n\t" \
00740 "psrad $" #shift ", %%mm7 \n\t"\
00741 "psrad $" #shift ", %%mm4 \n\t"\
00742 "movq %%mm0, %%mm2 \n\t" \
00743 "paddd %%mm1, %%mm0 \n\t" \
00744 "psubd %%mm1, %%mm2 \n\t" \
00745 "psrad $" #shift ", %%mm0 \n\t"\
00746 "psrad $" #shift ", %%mm2 \n\t"\
00747 "packssdw %%mm7, %%mm7 \n\t" \
00748 "movd %%mm7, " #dst " \n\t"\
00749 "packssdw %%mm0, %%mm0 \n\t" \
00750 "movd %%mm0, 16+" #dst " \n\t"\
00751 "packssdw %%mm2, %%mm2 \n\t" \
00752 "movd %%mm2, 96+" #dst " \n\t"\
00753 "packssdw %%mm4, %%mm4 \n\t" \
00754 "movd %%mm4, 112+" #dst " \n\t"\
00755 "movq " #src1 ", %%mm0 \n\t" \
00756 "movq 80(%2), %%mm4 \n\t" \
00757 "pmaddwd %%mm0, %%mm4 \n\t" \
00758 "movq 88(%2), %%mm7 \n\t" \
00759 "pmaddwd 96(%2), %%mm0 \n\t" \
00760 "pmaddwd %%mm3, %%mm7 \n\t" \
00761 "movq %%mm5, %%mm2 \n\t" \
00762 "pmaddwd 104(%2), %%mm3 \n\t" \
00763 "paddd %%mm7, %%mm4 \n\t" \
00764 "paddd %%mm4, %%mm2 \n\t" \
00765 "psubd %%mm4, %%mm5 \n\t" \
00766 "psrad $" #shift ", %%mm2 \n\t"\
00767 "psrad $" #shift ", %%mm5 \n\t"\
00768 "movq %%mm6, %%mm4 \n\t" \
00769 "paddd %%mm0, %%mm3 \n\t" \
00770 "paddd %%mm3, %%mm6 \n\t" \
00771 "psubd %%mm3, %%mm4 \n\t" \
00772 "psrad $" #shift ", %%mm6 \n\t"\
00773 "psrad $" #shift ", %%mm4 \n\t"\
00774 "packssdw %%mm2, %%mm2 \n\t" \
00775 "packssdw %%mm6, %%mm6 \n\t" \
00776 "movd %%mm2, 32+" #dst " \n\t"\
00777 "packssdw %%mm4, %%mm4 \n\t" \
00778 "packssdw %%mm5, %%mm5 \n\t" \
00779 "movd %%mm6, 48+" #dst " \n\t"\
00780 "movd %%mm4, 64+" #dst " \n\t"\
00781 "movd %%mm5, 80+" #dst " \n\t"
00782
00783
00784
00785 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00786 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00787 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00788 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00789 "jmp 9f \n\t"
00790
00791 "#" ASMALIGN(4) \
00792 "4: \n\t"
00793 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
00794 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
00795
00796 #undef IDCT
00797 #define IDCT(src0, src4, src1, src5, dst, shift) \
00798 "movq " #src0 ", %%mm0 \n\t" \
00799 "movq " #src4 ", %%mm1 \n\t" \
00800 "movq " #src5 ", %%mm3 \n\t" \
00801 "movq 16(%2), %%mm4 \n\t" \
00802 "pmaddwd %%mm0, %%mm4 \n\t" \
00803 "movq 24(%2), %%mm5 \n\t" \
00804 "pmaddwd %%mm5, %%mm0 \n\t" \
00805 "movq 32(%2), %%mm5 \n\t" \
00806 "pmaddwd %%mm1, %%mm5 \n\t" \
00807 "movq 40(%2), %%mm6 \n\t" \
00808 "pmaddwd %%mm6, %%mm1 \n\t" \
00809 "movq %%mm4, %%mm6 \n\t" \
00810 "paddd %%mm5, %%mm4 \n\t" \
00811 "psubd %%mm5, %%mm6 \n\t" \
00812 "movq %%mm0, %%mm5 \n\t" \
00813 "paddd %%mm1, %%mm0 \n\t" \
00814 "psubd %%mm1, %%mm5 \n\t" \
00815 "movq 56(%2), %%mm1 \n\t" \
00816 "pmaddwd %%mm3, %%mm1 \n\t" \
00817 "movq 72(%2), %%mm7 \n\t" \
00818 "pmaddwd %%mm3, %%mm7 \n\t" \
00819 "paddd %%mm4, %%mm1 \n\t" \
00820 "paddd %%mm4, %%mm4 \n\t" \
00821 "psubd %%mm1, %%mm4 \n\t" \
00822 "psrad $" #shift ", %%mm1 \n\t"\
00823 "psrad $" #shift ", %%mm4 \n\t"\
00824 "movq %%mm0, %%mm2 \n\t" \
00825 "paddd %%mm7, %%mm0 \n\t" \
00826 "psubd %%mm7, %%mm2 \n\t" \
00827 "psrad $" #shift ", %%mm0 \n\t"\
00828 "psrad $" #shift ", %%mm2 \n\t"\
00829 "packssdw %%mm1, %%mm1 \n\t" \
00830 "movd %%mm1, " #dst " \n\t"\
00831 "packssdw %%mm0, %%mm0 \n\t" \
00832 "movd %%mm0, 16+" #dst " \n\t"\
00833 "packssdw %%mm2, %%mm2 \n\t" \
00834 "movd %%mm2, 96+" #dst " \n\t"\
00835 "packssdw %%mm4, %%mm4 \n\t" \
00836 "movd %%mm4, 112+" #dst " \n\t"\
00837 "movq 88(%2), %%mm1 \n\t" \
00838 "pmaddwd %%mm3, %%mm1 \n\t" \
00839 "movq %%mm5, %%mm2 \n\t" \
00840 "pmaddwd 104(%2), %%mm3 \n\t" \
00841 "paddd %%mm1, %%mm2 \n\t" \
00842 "psubd %%mm1, %%mm5 \n\t" \
00843 "psrad $" #shift ", %%mm2 \n\t"\
00844 "psrad $" #shift ", %%mm5 \n\t"\
00845 "movq %%mm6, %%mm1 \n\t" \
00846 "paddd %%mm3, %%mm6 \n\t" \
00847 "psubd %%mm3, %%mm1 \n\t" \
00848 "psrad $" #shift ", %%mm6 \n\t"\
00849 "psrad $" #shift ", %%mm1 \n\t"\
00850 "packssdw %%mm2, %%mm2 \n\t" \
00851 "packssdw %%mm6, %%mm6 \n\t" \
00852 "movd %%mm2, 32+" #dst " \n\t"\
00853 "packssdw %%mm1, %%mm1 \n\t" \
00854 "packssdw %%mm5, %%mm5 \n\t" \
00855 "movd %%mm6, 48+" #dst " \n\t"\
00856 "movd %%mm1, 64+" #dst " \n\t"\
00857 "movd %%mm5, 80+" #dst " \n\t"
00858
00859
00860 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00861 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00862 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00863 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00864 "jmp 9f \n\t"
00865
00866 "#" ASMALIGN(4) \
00867 "6: \n\t"
00868 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
00869
00870 #undef IDCT
00871 #define IDCT(src0, src4, src1, src5, dst, shift) \
00872 "movq " #src0 ", %%mm0 \n\t" \
00873 "movq " #src5 ", %%mm3 \n\t" \
00874 "movq 16(%2), %%mm4 \n\t" \
00875 "pmaddwd %%mm0, %%mm4 \n\t" \
00876 "movq 24(%2), %%mm5 \n\t" \
00877 "pmaddwd %%mm5, %%mm0 \n\t" \
00878 "movq %%mm4, %%mm6 \n\t" \
00879 "movq %%mm0, %%mm5 \n\t" \
00880 "movq 56(%2), %%mm1 \n\t" \
00881 "pmaddwd %%mm3, %%mm1 \n\t" \
00882 "movq 72(%2), %%mm7 \n\t" \
00883 "pmaddwd %%mm3, %%mm7 \n\t" \
00884 "paddd %%mm4, %%mm1 \n\t" \
00885 "paddd %%mm4, %%mm4 \n\t" \
00886 "psubd %%mm1, %%mm4 \n\t" \
00887 "psrad $" #shift ", %%mm1 \n\t"\
00888 "psrad $" #shift ", %%mm4 \n\t"\
00889 "movq %%mm0, %%mm2 \n\t" \
00890 "paddd %%mm7, %%mm0 \n\t" \
00891 "psubd %%mm7, %%mm2 \n\t" \
00892 "psrad $" #shift ", %%mm0 \n\t"\
00893 "psrad $" #shift ", %%mm2 \n\t"\
00894 "packssdw %%mm1, %%mm1 \n\t" \
00895 "movd %%mm1, " #dst " \n\t"\
00896 "packssdw %%mm0, %%mm0 \n\t" \
00897 "movd %%mm0, 16+" #dst " \n\t"\
00898 "packssdw %%mm2, %%mm2 \n\t" \
00899 "movd %%mm2, 96+" #dst " \n\t"\
00900 "packssdw %%mm4, %%mm4 \n\t" \
00901 "movd %%mm4, 112+" #dst " \n\t"\
00902 "movq 88(%2), %%mm1 \n\t" \
00903 "pmaddwd %%mm3, %%mm1 \n\t" \
00904 "movq %%mm5, %%mm2 \n\t" \
00905 "pmaddwd 104(%2), %%mm3 \n\t" \
00906 "paddd %%mm1, %%mm2 \n\t" \
00907 "psubd %%mm1, %%mm5 \n\t" \
00908 "psrad $" #shift ", %%mm2 \n\t"\
00909 "psrad $" #shift ", %%mm5 \n\t"\
00910 "movq %%mm6, %%mm1 \n\t" \
00911 "paddd %%mm3, %%mm6 \n\t" \
00912 "psubd %%mm3, %%mm1 \n\t" \
00913 "psrad $" #shift ", %%mm6 \n\t"\
00914 "psrad $" #shift ", %%mm1 \n\t"\
00915 "packssdw %%mm2, %%mm2 \n\t" \
00916 "packssdw %%mm6, %%mm6 \n\t" \
00917 "movd %%mm2, 32+" #dst " \n\t"\
00918 "packssdw %%mm1, %%mm1 \n\t" \
00919 "packssdw %%mm5, %%mm5 \n\t" \
00920 "movd %%mm6, 48+" #dst " \n\t"\
00921 "movd %%mm1, 64+" #dst " \n\t"\
00922 "movd %%mm5, 80+" #dst " \n\t"
00923
00924
00925
00926 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00927 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00928 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00929 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00930 "jmp 9f \n\t"
00931
00932 "#" ASMALIGN(4) \
00933 "2: \n\t"
00934 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
00935
00936 #undef IDCT
00937 #define IDCT(src0, src4, src1, src5, dst, shift) \
00938 "movq " #src0 ", %%mm0 \n\t" \
00939 "movq " #src1 ", %%mm2 \n\t" \
00940 "movq " #src5 ", %%mm3 \n\t" \
00941 "movq 16(%2), %%mm4 \n\t" \
00942 "pmaddwd %%mm0, %%mm4 \n\t" \
00943 "movq 24(%2), %%mm5 \n\t" \
00944 "pmaddwd %%mm5, %%mm0 \n\t" \
00945 "movq %%mm4, %%mm6 \n\t" \
00946 "movq 48(%2), %%mm7 \n\t" \
00947 "pmaddwd %%mm2, %%mm7 \n\t" \
00948 "movq %%mm0, %%mm5 \n\t" \
00949 "movq 56(%2), %%mm1 \n\t" \
00950 "pmaddwd %%mm3, %%mm1 \n\t" \
00951 "pmaddwd 64(%2), %%mm2 \n\t" \
00952 "paddd %%mm1, %%mm7 \n\t" \
00953 "movq 72(%2), %%mm1 \n\t" \
00954 "pmaddwd %%mm3, %%mm1 \n\t" \
00955 "paddd %%mm4, %%mm7 \n\t" \
00956 "paddd %%mm4, %%mm4 \n\t" \
00957 "psubd %%mm7, %%mm4 \n\t" \
00958 "paddd %%mm2, %%mm1 \n\t" \
00959 "psrad $" #shift ", %%mm7 \n\t"\
00960 "psrad $" #shift ", %%mm4 \n\t"\
00961 "movq %%mm0, %%mm2 \n\t" \
00962 "paddd %%mm1, %%mm0 \n\t" \
00963 "psubd %%mm1, %%mm2 \n\t" \
00964 "psrad $" #shift ", %%mm0 \n\t"\
00965 "psrad $" #shift ", %%mm2 \n\t"\
00966 "packssdw %%mm7, %%mm7 \n\t" \
00967 "movd %%mm7, " #dst " \n\t"\
00968 "packssdw %%mm0, %%mm0 \n\t" \
00969 "movd %%mm0, 16+" #dst " \n\t"\
00970 "packssdw %%mm2, %%mm2 \n\t" \
00971 "movd %%mm2, 96+" #dst " \n\t"\
00972 "packssdw %%mm4, %%mm4 \n\t" \
00973 "movd %%mm4, 112+" #dst " \n\t"\
00974 "movq " #src1 ", %%mm0 \n\t" \
00975 "movq 80(%2), %%mm4 \n\t" \
00976 "pmaddwd %%mm0, %%mm4 \n\t" \
00977 "movq 88(%2), %%mm7 \n\t" \
00978 "pmaddwd 96(%2), %%mm0 \n\t" \
00979 "pmaddwd %%mm3, %%mm7 \n\t" \
00980 "movq %%mm5, %%mm2 \n\t" \
00981 "pmaddwd 104(%2), %%mm3 \n\t" \
00982 "paddd %%mm7, %%mm4 \n\t" \
00983 "paddd %%mm4, %%mm2 \n\t" \
00984 "psubd %%mm4, %%mm5 \n\t" \
00985 "psrad $" #shift ", %%mm2 \n\t"\
00986 "psrad $" #shift ", %%mm5 \n\t"\
00987 "movq %%mm6, %%mm4 \n\t" \
00988 "paddd %%mm0, %%mm3 \n\t" \
00989 "paddd %%mm3, %%mm6 \n\t" \
00990 "psubd %%mm3, %%mm4 \n\t" \
00991 "psrad $" #shift ", %%mm6 \n\t"\
00992 "psrad $" #shift ", %%mm4 \n\t"\
00993 "packssdw %%mm2, %%mm2 \n\t" \
00994 "packssdw %%mm6, %%mm6 \n\t" \
00995 "movd %%mm2, 32+" #dst " \n\t"\
00996 "packssdw %%mm4, %%mm4 \n\t" \
00997 "packssdw %%mm5, %%mm5 \n\t" \
00998 "movd %%mm6, 48+" #dst " \n\t"\
00999 "movd %%mm4, 64+" #dst " \n\t"\
01000 "movd %%mm5, 80+" #dst " \n\t"
01001
01002
01003 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01004 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
01005 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01006 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
01007 "jmp 9f \n\t"
01008
01009 "#" ASMALIGN(4) \
01010 "3: \n\t"
01011 #undef IDCT
01012 #define IDCT(src0, src4, src1, src5, dst, shift) \
01013 "movq " #src0 ", %%mm0 \n\t" \
01014 "movq " #src1 ", %%mm2 \n\t" \
01015 "movq 16(%2), %%mm4 \n\t" \
01016 "pmaddwd %%mm0, %%mm4 \n\t" \
01017 "movq 24(%2), %%mm5 \n\t" \
01018 "pmaddwd %%mm5, %%mm0 \n\t" \
01019 "movq %%mm4, %%mm6 \n\t" \
01020 "movq 48(%2), %%mm7 \n\t" \
01021 "pmaddwd %%mm2, %%mm7 \n\t" \
01022 "movq %%mm0, %%mm5 \n\t" \
01023 "movq 64(%2), %%mm3 \n\t"\
01024 "pmaddwd %%mm2, %%mm3 \n\t" \
01025 "paddd %%mm4, %%mm7 \n\t" \
01026 "paddd %%mm4, %%mm4 \n\t" \
01027 "psubd %%mm7, %%mm4 \n\t" \
01028 "psrad $" #shift ", %%mm7 \n\t"\
01029 "psrad $" #shift ", %%mm4 \n\t"\
01030 "movq %%mm0, %%mm1 \n\t" \
01031 "paddd %%mm3, %%mm0 \n\t" \
01032 "psubd %%mm3, %%mm1 \n\t" \
01033 "psrad $" #shift ", %%mm0 \n\t"\
01034 "psrad $" #shift ", %%mm1 \n\t"\
01035 "packssdw %%mm7, %%mm7 \n\t" \
01036 "movd %%mm7, " #dst " \n\t"\
01037 "packssdw %%mm0, %%mm0 \n\t" \
01038 "movd %%mm0, 16+" #dst " \n\t"\
01039 "packssdw %%mm1, %%mm1 \n\t" \
01040 "movd %%mm1, 96+" #dst " \n\t"\
01041 "packssdw %%mm4, %%mm4 \n\t" \
01042 "movd %%mm4, 112+" #dst " \n\t"\
01043 "movq 80(%2), %%mm4 \n\t" \
01044 "pmaddwd %%mm2, %%mm4 \n\t" \
01045 "pmaddwd 96(%2), %%mm2 \n\t" \
01046 "movq %%mm5, %%mm1 \n\t" \
01047 "paddd %%mm4, %%mm1 \n\t" \
01048 "psubd %%mm4, %%mm5 \n\t" \
01049 "psrad $" #shift ", %%mm1 \n\t"\
01050 "psrad $" #shift ", %%mm5 \n\t"\
01051 "movq %%mm6, %%mm4 \n\t" \
01052 "paddd %%mm2, %%mm6 \n\t" \
01053 "psubd %%mm2, %%mm4 \n\t" \
01054 "psrad $" #shift ", %%mm6 \n\t"\
01055 "psrad $" #shift ", %%mm4 \n\t"\
01056 "packssdw %%mm1, %%mm1 \n\t" \
01057 "packssdw %%mm6, %%mm6 \n\t" \
01058 "movd %%mm1, 32+" #dst " \n\t"\
01059 "packssdw %%mm4, %%mm4 \n\t" \
01060 "packssdw %%mm5, %%mm5 \n\t" \
01061 "movd %%mm6, 48+" #dst " \n\t"\
01062 "movd %%mm4, 64+" #dst " \n\t"\
01063 "movd %%mm5, 80+" #dst " \n\t"
01064
01065
01066
01067 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01068 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
01069 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01070 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
01071 "jmp 9f \n\t"
01072
01073 "#" ASMALIGN(4) \
01074 "5: \n\t"
01075 #undef IDCT
01076 #define IDCT(src0, src4, src1, src5, dst, shift) \
01077 "movq " #src0 ", %%mm0 \n\t" \
01078 "movq " #src4 ", %%mm1 \n\t" \
01079 "movq 16(%2), %%mm4 \n\t" \
01080 "pmaddwd %%mm0, %%mm4 \n\t" \
01081 "movq 24(%2), %%mm5 \n\t" \
01082 "pmaddwd %%mm5, %%mm0 \n\t" \
01083 "movq 32(%2), %%mm5 \n\t" \
01084 "pmaddwd %%mm1, %%mm5 \n\t" \
01085 "movq 40(%2), %%mm6 \n\t" \
01086 "pmaddwd %%mm6, %%mm1 \n\t" \
01087 "movq %%mm4, %%mm6 \n\t" \
01088 "paddd %%mm5, %%mm4 \n\t" \
01089 "psubd %%mm5, %%mm6 \n\t" \
01090 "movq %%mm0, %%mm5 \n\t" \
01091 "paddd %%mm1, %%mm0 \n\t" \
01092 "psubd %%mm1, %%mm5 \n\t" \
01093 "movq 8+" #src0 ", %%mm2 \n\t" \
01094 "movq 8+" #src4 ", %%mm3 \n\t" \
01095 "movq 16(%2), %%mm1 \n\t" \
01096 "pmaddwd %%mm2, %%mm1 \n\t" \
01097 "movq 24(%2), %%mm7 \n\t" \
01098 "pmaddwd %%mm7, %%mm2 \n\t" \
01099 "movq 32(%2), %%mm7 \n\t" \
01100 "pmaddwd %%mm3, %%mm7 \n\t" \
01101 "pmaddwd 40(%2), %%mm3 \n\t" \
01102 "paddd %%mm1, %%mm7 \n\t" \
01103 "paddd %%mm1, %%mm1 \n\t" \
01104 "psubd %%mm7, %%mm1 \n\t" \
01105 "paddd %%mm2, %%mm3 \n\t" \
01106 "paddd %%mm2, %%mm2 \n\t" \
01107 "psubd %%mm3, %%mm2 \n\t" \
01108 "psrad $" #shift ", %%mm4 \n\t"\
01109 "psrad $" #shift ", %%mm7 \n\t"\
01110 "psrad $" #shift ", %%mm3 \n\t"\
01111 "packssdw %%mm7, %%mm4 \n\t" \
01112 "movq %%mm4, " #dst " \n\t"\
01113 "psrad $" #shift ", %%mm0 \n\t"\
01114 "packssdw %%mm3, %%mm0 \n\t" \
01115 "movq %%mm0, 16+" #dst " \n\t"\
01116 "movq %%mm0, 96+" #dst " \n\t"\
01117 "movq %%mm4, 112+" #dst " \n\t"\
01118 "psrad $" #shift ", %%mm5 \n\t"\
01119 "psrad $" #shift ", %%mm6 \n\t"\
01120 "psrad $" #shift ", %%mm2 \n\t"\
01121 "packssdw %%mm2, %%mm5 \n\t" \
01122 "movq %%mm5, 32+" #dst " \n\t"\
01123 "psrad $" #shift ", %%mm1 \n\t"\
01124 "packssdw %%mm1, %%mm6 \n\t" \
01125 "movq %%mm6, 48+" #dst " \n\t"\
01126 "movq %%mm6, 64+" #dst " \n\t"\
01127 "movq %%mm5, 80+" #dst " \n\t"
01128
01129
01130
01131 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01132
01133 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01134
01135 "jmp 9f \n\t"
01136
01137
01138 "#" ASMALIGN(4) \
01139 "1: \n\t"
01140 #undef IDCT
01141 #define IDCT(src0, src4, src1, src5, dst, shift) \
01142 "movq " #src0 ", %%mm0 \n\t" \
01143 "movq " #src4 ", %%mm1 \n\t" \
01144 "movq " #src1 ", %%mm2 \n\t" \
01145 "movq 16(%2), %%mm4 \n\t" \
01146 "pmaddwd %%mm0, %%mm4 \n\t" \
01147 "movq 24(%2), %%mm5 \n\t" \
01148 "pmaddwd %%mm5, %%mm0 \n\t" \
01149 "movq 32(%2), %%mm5 \n\t" \
01150 "pmaddwd %%mm1, %%mm5 \n\t" \
01151 "movq 40(%2), %%mm6 \n\t" \
01152 "pmaddwd %%mm6, %%mm1 \n\t" \
01153 "movq %%mm4, %%mm6 \n\t" \
01154 "movq 48(%2), %%mm7 \n\t" \
01155 "pmaddwd %%mm2, %%mm7 \n\t" \
01156 "paddd %%mm5, %%mm4 \n\t" \
01157 "psubd %%mm5, %%mm6 \n\t" \
01158 "movq %%mm0, %%mm5 \n\t" \
01159 "paddd %%mm1, %%mm0 \n\t" \
01160 "psubd %%mm1, %%mm5 \n\t" \
01161 "movq 64(%2), %%mm1 \n\t"\
01162 "pmaddwd %%mm2, %%mm1 \n\t" \
01163 "paddd %%mm4, %%mm7 \n\t" \
01164 "paddd %%mm4, %%mm4 \n\t" \
01165 "psubd %%mm7, %%mm4 \n\t" \
01166 "psrad $" #shift ", %%mm7 \n\t"\
01167 "psrad $" #shift ", %%mm4 \n\t"\
01168 "movq %%mm0, %%mm3 \n\t" \
01169 "paddd %%mm1, %%mm0 \n\t" \
01170 "psubd %%mm1, %%mm3 \n\t" \
01171 "psrad $" #shift ", %%mm0 \n\t"\
01172 "psrad $" #shift ", %%mm3 \n\t"\
01173 "packssdw %%mm7, %%mm7 \n\t" \
01174 "movd %%mm7, " #dst " \n\t"\
01175 "packssdw %%mm0, %%mm0 \n\t" \
01176 "movd %%mm0, 16+" #dst " \n\t"\
01177 "packssdw %%mm3, %%mm3 \n\t" \
01178 "movd %%mm3, 96+" #dst " \n\t"\
01179 "packssdw %%mm4, %%mm4 \n\t" \
01180 "movd %%mm4, 112+" #dst " \n\t"\
01181 "movq 80(%2), %%mm4 \n\t" \
01182 "pmaddwd %%mm2, %%mm4 \n\t" \
01183 "pmaddwd 96(%2), %%mm2 \n\t" \
01184 "movq %%mm5, %%mm3 \n\t" \
01185 "paddd %%mm4, %%mm3 \n\t" \
01186 "psubd %%mm4, %%mm5 \n\t" \
01187 "psrad $" #shift ", %%mm3 \n\t"\
01188 "psrad $" #shift ", %%mm5 \n\t"\
01189 "movq %%mm6, %%mm4 \n\t" \
01190 "paddd %%mm2, %%mm6 \n\t" \
01191 "psubd %%mm2, %%mm4 \n\t" \
01192 "psrad $" #shift ", %%mm6 \n\t"\
01193 "packssdw %%mm3, %%mm3 \n\t" \
01194 "movd %%mm3, 32+" #dst " \n\t"\
01195 "psrad $" #shift ", %%mm4 \n\t"\
01196 "packssdw %%mm6, %%mm6 \n\t" \
01197 "movd %%mm6, 48+" #dst " \n\t"\
01198 "packssdw %%mm4, %%mm4 \n\t" \
01199 "packssdw %%mm5, %%mm5 \n\t" \
01200 "movd %%mm4, 64+" #dst " \n\t"\
01201 "movd %%mm5, 80+" #dst " \n\t"
01202
01203
01204
01205 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01206 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
01207 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01208 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
01209 "jmp 9f \n\t"
01210
01211
01212 "#" ASMALIGN(4)
01213 "7: \n\t"
01214 #undef IDCT
01215 #define IDCT(src0, src4, src1, src5, dst, shift) \
01216 "movq " #src0 ", %%mm0 \n\t" \
01217 "movq 16(%2), %%mm4 \n\t" \
01218 "pmaddwd %%mm0, %%mm4 \n\t" \
01219 "movq 24(%2), %%mm5 \n\t" \
01220 "pmaddwd %%mm5, %%mm0 \n\t" \
01221 "psrad $" #shift ", %%mm4 \n\t"\
01222 "psrad $" #shift ", %%mm0 \n\t"\
01223 "movq 8+" #src0 ", %%mm2 \n\t" \
01224 "movq 16(%2), %%mm1 \n\t" \
01225 "pmaddwd %%mm2, %%mm1 \n\t" \
01226 "movq 24(%2), %%mm7 \n\t" \
01227 "pmaddwd %%mm7, %%mm2 \n\t" \
01228 "movq 32(%2), %%mm7 \n\t" \
01229 "psrad $" #shift ", %%mm1 \n\t"\
01230 "packssdw %%mm1, %%mm4 \n\t" \
01231 "movq %%mm4, " #dst " \n\t"\
01232 "psrad $" #shift ", %%mm2 \n\t"\
01233 "packssdw %%mm2, %%mm0 \n\t" \
01234 "movq %%mm0, 16+" #dst " \n\t"\
01235 "movq %%mm0, 96+" #dst " \n\t"\
01236 "movq %%mm4, 112+" #dst " \n\t"\
01237 "movq %%mm0, 32+" #dst " \n\t"\
01238 "movq %%mm4, 48+" #dst " \n\t"\
01239 "movq %%mm4, 64+" #dst " \n\t"\
01240 "movq %%mm0, 80+" #dst " \n\t"
01241
01242
01243 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01244
01245 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01246
01247
01248
01249 #endif
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272
01273 "9: \n\t"
01274 :: "r" (block), "r" (temp), "r" (coeffs)
01275 : "%eax"
01276 );
01277 }
01278
01279 void ff_simple_idct_mmx(int16_t *block)
01280 {
01281 idct(block);
01282 }
01283
01284
01285
01286 void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
01287 {
01288 idct(block);
01289 put_pixels_clamped_mmx(block, dest, line_size);
01290 }
01291 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
01292 {
01293 idct(block);
01294 add_pixels_clamped_mmx(block, dest, line_size);
01295 }