00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "libavutil/common.h"
00034 #include "libavutil/x86_cpu.h"
00035 #include "libavcodec/dsputil.h"
00036
00038
00039
00040
00041
00042
00043
00044
00045
00047
00048 #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
00049 #define SHIFT_FRW_COL BITS_FRW_ACC
00050 #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
00051 #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
00052
00053
00054 #define X8(x) x,x,x,x,x,x,x,x
00055
00056
00057 DECLARE_ALIGNED(16, static const int16_t, fdct_tg_all_16)[24] = {
00058 X8(13036),
00059 X8(27146),
00060 X8(-21746)
00061 };
00062
00063 DECLARE_ALIGNED(16, static const int16_t, ocos_4_16)[8] = {
00064 X8(23170)
00065 };
00066
00067 DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
00068
00069 DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, RND_FRW_ROW };
00070
00071 static struct
00072 {
00073 DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
00074 } fdct_r_row_sse2 =
00075 {{
00076 RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW
00077 }};
00078
00079
00080 DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = {
00081 16384, 16384, 22725, 19266,
00082 16384, 16384, 12873, 4520,
00083 21407, 8867, 19266, -4520,
00084 -8867, -21407, -22725, -12873,
00085 16384, -16384, 12873, -22725,
00086 -16384, 16384, 4520, 19266,
00087 8867, -21407, 4520, -12873,
00088 21407, -8867, 19266, -22725,
00089
00090 22725, 22725, 31521, 26722,
00091 22725, 22725, 17855, 6270,
00092 29692, 12299, 26722, -6270,
00093 -12299, -29692, -31521, -17855,
00094 22725, -22725, 17855, -31521,
00095 -22725, 22725, 6270, 26722,
00096 12299, -29692, 6270, -17855,
00097 29692, -12299, 26722, -31521,
00098
00099 21407, 21407, 29692, 25172,
00100 21407, 21407, 16819, 5906,
00101 27969, 11585, 25172, -5906,
00102 -11585, -27969, -29692, -16819,
00103 21407, -21407, 16819, -29692,
00104 -21407, 21407, 5906, 25172,
00105 11585, -27969, 5906, -16819,
00106 27969, -11585, 25172, -29692,
00107
00108 19266, 19266, 26722, 22654,
00109 19266, 19266, 15137, 5315,
00110 25172, 10426, 22654, -5315,
00111 -10426, -25172, -26722, -15137,
00112 19266, -19266, 15137, -26722,
00113 -19266, 19266, 5315, 22654,
00114 10426, -25172, 5315, -15137,
00115 25172, -10426, 22654, -26722,
00116
00117 16384, 16384, 22725, 19266,
00118 16384, 16384, 12873, 4520,
00119 21407, 8867, 19266, -4520,
00120 -8867, -21407, -22725, -12873,
00121 16384, -16384, 12873, -22725,
00122 -16384, 16384, 4520, 19266,
00123 8867, -21407, 4520, -12873,
00124 21407, -8867, 19266, -22725,
00125
00126 19266, 19266, 26722, 22654,
00127 19266, 19266, 15137, 5315,
00128 25172, 10426, 22654, -5315,
00129 -10426, -25172, -26722, -15137,
00130 19266, -19266, 15137, -26722,
00131 -19266, 19266, 5315, 22654,
00132 10426, -25172, 5315, -15137,
00133 25172, -10426, 22654, -26722,
00134
00135 21407, 21407, 29692, 25172,
00136 21407, 21407, 16819, 5906,
00137 27969, 11585, 25172, -5906,
00138 -11585, -27969, -29692, -16819,
00139 21407, -21407, 16819, -29692,
00140 -21407, 21407, 5906, 25172,
00141 11585, -27969, 5906, -16819,
00142 27969, -11585, 25172, -29692,
00143
00144 22725, 22725, 31521, 26722,
00145 22725, 22725, 17855, 6270,
00146 29692, 12299, 26722, -6270,
00147 -12299, -29692, -31521, -17855,
00148 22725, -22725, 17855, -31521,
00149 -22725, 22725, 6270, 26722,
00150 12299, -29692, 6270, -17855,
00151 29692, -12299, 26722, -31521,
00152 };
00153
00154 static struct
00155 {
00156 DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
00157 } tab_frw_01234567_sse2 =
00158 {{
00159
00160 #define TABLE_SSE2 C4, C4, C1, C3, -C6, -C2, -C1, -C5, \
00161 C4, C4, C5, C7, C2, C6, C3, -C7, \
00162 -C4, C4, C7, C3, C6, -C2, C7, -C5, \
00163 C4, -C4, C5, -C1, C2, -C6, C3, -C1,
00164
00165 #define C1 22725
00166 #define C2 21407
00167 #define C3 19266
00168 #define C4 16384
00169 #define C5 12873
00170 #define C6 8867
00171 #define C7 4520
00172 TABLE_SSE2
00173
00174 #undef C1
00175 #undef C2
00176 #undef C3
00177 #undef C4
00178 #undef C5
00179 #undef C6
00180 #undef C7
00181 #define C1 31521
00182 #define C2 29692
00183 #define C3 26722
00184 #define C4 22725
00185 #define C5 17855
00186 #define C6 12299
00187 #define C7 6270
00188 TABLE_SSE2
00189
00190 #undef C1
00191 #undef C2
00192 #undef C3
00193 #undef C4
00194 #undef C5
00195 #undef C6
00196 #undef C7
00197 #define C1 29692
00198 #define C2 27969
00199 #define C3 25172
00200 #define C4 21407
00201 #define C5 16819
00202 #define C6 11585
00203 #define C7 5906
00204 TABLE_SSE2
00205
00206 #undef C1
00207 #undef C2
00208 #undef C3
00209 #undef C4
00210 #undef C5
00211 #undef C6
00212 #undef C7
00213 #define C1 26722
00214 #define C2 25172
00215 #define C3 22654
00216 #define C4 19266
00217 #define C5 15137
00218 #define C6 10426
00219 #define C7 5315
00220 TABLE_SSE2
00221
00222 #undef C1
00223 #undef C2
00224 #undef C3
00225 #undef C4
00226 #undef C5
00227 #undef C6
00228 #undef C7
00229 #define C1 22725
00230 #define C2 21407
00231 #define C3 19266
00232 #define C4 16384
00233 #define C5 12873
00234 #define C6 8867
00235 #define C7 4520
00236 TABLE_SSE2
00237
00238 #undef C1
00239 #undef C2
00240 #undef C3
00241 #undef C4
00242 #undef C5
00243 #undef C6
00244 #undef C7
00245 #define C1 26722
00246 #define C2 25172
00247 #define C3 22654
00248 #define C4 19266
00249 #define C5 15137
00250 #define C6 10426
00251 #define C7 5315
00252 TABLE_SSE2
00253
00254 #undef C1
00255 #undef C2
00256 #undef C3
00257 #undef C4
00258 #undef C5
00259 #undef C6
00260 #undef C7
00261 #define C1 29692
00262 #define C2 27969
00263 #define C3 25172
00264 #define C4 21407
00265 #define C5 16819
00266 #define C6 11585
00267 #define C7 5906
00268 TABLE_SSE2
00269
00270 #undef C1
00271 #undef C2
00272 #undef C3
00273 #undef C4
00274 #undef C5
00275 #undef C6
00276 #undef C7
00277 #define C1 31521
00278 #define C2 29692
00279 #define C3 26722
00280 #define C4 22725
00281 #define C5 17855
00282 #define C6 12299
00283 #define C7 6270
00284 TABLE_SSE2
00285 }};
00286
00287 #define S(s) AV_TOSTRING(s) //AV_STRINGIFY is too long
00288
00289 #define FDCT_COL(cpu, mm, mov)\
00290 static av_always_inline void fdct_col_##cpu(const int16_t *in, int16_t *out, int offset)\
00291 {\
00292 __asm__ volatile (\
00293 #mov" 16(%0), %%"#mm"0 \n\t" \
00294 #mov" 96(%0), %%"#mm"1 \n\t" \
00295 #mov" %%"#mm"0, %%"#mm"2 \n\t" \
00296 #mov" 32(%0), %%"#mm"3 \n\t" \
00297 "paddsw %%"#mm"1, %%"#mm"0 \n\t" \
00298 #mov" 80(%0), %%"#mm"4 \n\t" \
00299 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"0 \n\t" \
00300 #mov" (%0), %%"#mm"5 \n\t" \
00301 "paddsw %%"#mm"3, %%"#mm"4 \n\t" \
00302 "paddsw 112(%0), %%"#mm"5 \n\t" \
00303 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"4 \n\t" \
00304 #mov" %%"#mm"0, %%"#mm"6 \n\t" \
00305 "psubsw %%"#mm"1, %%"#mm"2 \n\t" \
00306 #mov" 16(%1), %%"#mm"1 \n\t" \
00307 "psubsw %%"#mm"4, %%"#mm"0 \n\t" \
00308 #mov" 48(%0), %%"#mm"7 \n\t" \
00309 "pmulhw %%"#mm"0, %%"#mm"1 \n\t" \
00310 "paddsw 64(%0), %%"#mm"7 \n\t" \
00311 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"5 \n\t" \
00312 "paddsw %%"#mm"4, %%"#mm"6 \n\t" \
00313 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"7 \n\t" \
00314 #mov" %%"#mm"5, %%"#mm"4 \n\t" \
00315 "psubsw %%"#mm"7, %%"#mm"5 \n\t" \
00316 "paddsw %%"#mm"5, %%"#mm"1 \n\t" \
00317 "paddsw %%"#mm"7, %%"#mm"4 \n\t" \
00318 "por (%2), %%"#mm"1 \n\t" \
00319 "psllw $"S(SHIFT_FRW_COL)"+1, %%"#mm"2 \n\t" \
00320 "pmulhw 16(%1), %%"#mm"5 \n\t" \
00321 #mov" %%"#mm"4, %%"#mm"7 \n\t" \
00322 "psubsw 80(%0), %%"#mm"3 \n\t" \
00323 "psubsw %%"#mm"6, %%"#mm"4 \n\t" \
00324 #mov" %%"#mm"1, 32(%3) \n\t" \
00325 "paddsw %%"#mm"6, %%"#mm"7 \n\t" \
00326 #mov" 48(%0), %%"#mm"1 \n\t" \
00327 "psllw $"S(SHIFT_FRW_COL)"+1, %%"#mm"3 \n\t" \
00328 "psubsw 64(%0), %%"#mm"1 \n\t" \
00329 #mov" %%"#mm"2, %%"#mm"6 \n\t" \
00330 #mov" %%"#mm"4, 64(%3) \n\t" \
00331 "paddsw %%"#mm"3, %%"#mm"2 \n\t" \
00332 "pmulhw (%4), %%"#mm"2 \n\t" \
00333 "psubsw %%"#mm"3, %%"#mm"6 \n\t" \
00334 "pmulhw (%4), %%"#mm"6 \n\t" \
00335 "psubsw %%"#mm"0, %%"#mm"5 \n\t" \
00336 "por (%2), %%"#mm"5 \n\t" \
00337 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"1 \n\t" \
00338 "por (%2), %%"#mm"2 \n\t" \
00339 #mov" %%"#mm"1, %%"#mm"4 \n\t" \
00340 #mov" (%0), %%"#mm"3 \n\t" \
00341 "paddsw %%"#mm"6, %%"#mm"1 \n\t" \
00342 "psubsw 112(%0), %%"#mm"3 \n\t" \
00343 "psubsw %%"#mm"6, %%"#mm"4 \n\t" \
00344 #mov" (%1), %%"#mm"0 \n\t" \
00345 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"3 \n\t" \
00346 #mov" 32(%1), %%"#mm"6 \n\t" \
00347 "pmulhw %%"#mm"1, %%"#mm"0 \n\t" \
00348 #mov" %%"#mm"7, (%3) \n\t" \
00349 "pmulhw %%"#mm"4, %%"#mm"6 \n\t" \
00350 #mov" %%"#mm"5, 96(%3) \n\t" \
00351 #mov" %%"#mm"3, %%"#mm"7 \n\t" \
00352 #mov" 32(%1), %%"#mm"5 \n\t" \
00353 "psubsw %%"#mm"2, %%"#mm"7 \n\t" \
00354 "paddsw %%"#mm"2, %%"#mm"3 \n\t" \
00355 "pmulhw %%"#mm"7, %%"#mm"5 \n\t" \
00356 "paddsw %%"#mm"3, %%"#mm"0 \n\t" \
00357 "paddsw %%"#mm"4, %%"#mm"6 \n\t" \
00358 "pmulhw (%1), %%"#mm"3 \n\t" \
00359 "por (%2), %%"#mm"0 \n\t" \
00360 "paddsw %%"#mm"7, %%"#mm"5 \n\t" \
00361 "psubsw %%"#mm"6, %%"#mm"7 \n\t" \
00362 #mov" %%"#mm"0, 16(%3) \n\t" \
00363 "paddsw %%"#mm"4, %%"#mm"5 \n\t" \
00364 #mov" %%"#mm"7, 48(%3) \n\t" \
00365 "psubsw %%"#mm"1, %%"#mm"3 \n\t" \
00366 #mov" %%"#mm"5, 80(%3) \n\t" \
00367 #mov" %%"#mm"3, 112(%3) \n\t" \
00368 : \
00369 : "r" (in + offset), "r" (fdct_tg_all_16), "r" (fdct_one_corr), \
00370 "r" (out + offset), "r" (ocos_4_16)); \
00371 }
00372
00373 FDCT_COL(mmx, mm, movq)
00374 FDCT_COL(sse2, xmm, movdqa)
00375
00376 static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
00377 {
00378 __asm__ volatile(
00379 #define FDCT_ROW_SSE2_H1(i,t) \
00380 "movq " #i "(%0), %%xmm2 \n\t" \
00381 "movq " #i "+8(%0), %%xmm0 \n\t" \
00382 "movdqa " #t "+32(%1), %%xmm3 \n\t" \
00383 "movdqa " #t "+48(%1), %%xmm7 \n\t" \
00384 "movdqa " #t "(%1), %%xmm4 \n\t" \
00385 "movdqa " #t "+16(%1), %%xmm5 \n\t"
00386
00387 #define FDCT_ROW_SSE2_H2(i,t) \
00388 "movq " #i "(%0), %%xmm2 \n\t" \
00389 "movq " #i "+8(%0), %%xmm0 \n\t" \
00390 "movdqa " #t "+32(%1), %%xmm3 \n\t" \
00391 "movdqa " #t "+48(%1), %%xmm7 \n\t"
00392
00393 #define FDCT_ROW_SSE2(i) \
00394 "movq %%xmm2, %%xmm1 \n\t" \
00395 "pshuflw $27, %%xmm0, %%xmm0 \n\t" \
00396 "paddsw %%xmm0, %%xmm1 \n\t" \
00397 "psubsw %%xmm0, %%xmm2 \n\t" \
00398 "punpckldq %%xmm2, %%xmm1 \n\t" \
00399 "pshufd $78, %%xmm1, %%xmm2 \n\t" \
00400 "pmaddwd %%xmm2, %%xmm3 \n\t" \
00401 "pmaddwd %%xmm1, %%xmm7 \n\t" \
00402 "pmaddwd %%xmm5, %%xmm2 \n\t" \
00403 "pmaddwd %%xmm4, %%xmm1 \n\t" \
00404 "paddd %%xmm7, %%xmm3 \n\t" \
00405 "paddd %%xmm2, %%xmm1 \n\t" \
00406 "paddd %%xmm6, %%xmm3 \n\t" \
00407 "paddd %%xmm6, %%xmm1 \n\t" \
00408 "psrad %3, %%xmm3 \n\t" \
00409 "psrad %3, %%xmm1 \n\t" \
00410 "packssdw %%xmm3, %%xmm1 \n\t" \
00411 "movdqa %%xmm1, " #i "(%4) \n\t"
00412
00413 "movdqa (%2), %%xmm6 \n\t"
00414 FDCT_ROW_SSE2_H1(0,0)
00415 FDCT_ROW_SSE2(0)
00416 FDCT_ROW_SSE2_H2(64,0)
00417 FDCT_ROW_SSE2(64)
00418
00419 FDCT_ROW_SSE2_H1(16,64)
00420 FDCT_ROW_SSE2(16)
00421 FDCT_ROW_SSE2_H2(112,64)
00422 FDCT_ROW_SSE2(112)
00423
00424 FDCT_ROW_SSE2_H1(32,128)
00425 FDCT_ROW_SSE2(32)
00426 FDCT_ROW_SSE2_H2(96,128)
00427 FDCT_ROW_SSE2(96)
00428
00429 FDCT_ROW_SSE2_H1(48,192)
00430 FDCT_ROW_SSE2(48)
00431 FDCT_ROW_SSE2_H2(80,192)
00432 FDCT_ROW_SSE2(80)
00433 :
00434 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2),
00435 "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
00436 XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
00437 "%xmm4", "%xmm5", "%xmm6", "%xmm7")
00438 );
00439 }
00440
00441 static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
00442 {
00443 __asm__ volatile (
00444 "pshufw $0x1B, 8(%0), %%mm5 \n\t"
00445 "movq (%0), %%mm0 \n\t"
00446 "movq %%mm0, %%mm1 \n\t"
00447 "paddsw %%mm5, %%mm0 \n\t"
00448 "psubsw %%mm5, %%mm1 \n\t"
00449 "movq %%mm0, %%mm2 \n\t"
00450 "punpckldq %%mm1, %%mm0 \n\t"
00451 "punpckhdq %%mm1, %%mm2 \n\t"
00452 "movq (%1), %%mm1 \n\t"
00453 "movq 8(%1), %%mm3 \n\t"
00454 "movq 16(%1), %%mm4 \n\t"
00455 "movq 24(%1), %%mm5 \n\t"
00456 "movq 32(%1), %%mm6 \n\t"
00457 "movq 40(%1), %%mm7 \n\t"
00458 "pmaddwd %%mm0, %%mm1 \n\t"
00459 "pmaddwd %%mm2, %%mm3 \n\t"
00460 "pmaddwd %%mm0, %%mm4 \n\t"
00461 "pmaddwd %%mm2, %%mm5 \n\t"
00462 "pmaddwd %%mm0, %%mm6 \n\t"
00463 "pmaddwd %%mm2, %%mm7 \n\t"
00464 "pmaddwd 48(%1), %%mm0 \n\t"
00465 "pmaddwd 56(%1), %%mm2 \n\t"
00466 "paddd %%mm1, %%mm3 \n\t"
00467 "paddd %%mm4, %%mm5 \n\t"
00468 "paddd %%mm6, %%mm7 \n\t"
00469 "paddd %%mm0, %%mm2 \n\t"
00470 "movq (%2), %%mm0 \n\t"
00471 "paddd %%mm0, %%mm3 \n\t"
00472 "paddd %%mm0, %%mm5 \n\t"
00473 "paddd %%mm0, %%mm7 \n\t"
00474 "paddd %%mm0, %%mm2 \n\t"
00475 "psrad $"S(SHIFT_FRW_ROW)", %%mm3 \n\t"
00476 "psrad $"S(SHIFT_FRW_ROW)", %%mm5 \n\t"
00477 "psrad $"S(SHIFT_FRW_ROW)", %%mm7 \n\t"
00478 "psrad $"S(SHIFT_FRW_ROW)", %%mm2 \n\t"
00479 "packssdw %%mm5, %%mm3 \n\t"
00480 "packssdw %%mm2, %%mm7 \n\t"
00481 "movq %%mm3, (%3) \n\t"
00482 "movq %%mm7, 8(%3) \n\t"
00483 :
00484 : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
00485 }
00486
00487 static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
00488 {
00489
00490 __asm__ volatile(
00491 "movd 12(%0), %%mm1 \n\t"
00492 "punpcklwd 8(%0), %%mm1 \n\t"
00493 "movq %%mm1, %%mm2 \n\t"
00494 "psrlq $0x20, %%mm1 \n\t"
00495 "movq 0(%0), %%mm0 \n\t"
00496 "punpcklwd %%mm2, %%mm1 \n\t"
00497 "movq %%mm0, %%mm5 \n\t"
00498 "paddsw %%mm1, %%mm0 \n\t"
00499 "psubsw %%mm1, %%mm5 \n\t"
00500 "movq %%mm0, %%mm2 \n\t"
00501 "punpckldq %%mm5, %%mm0 \n\t"
00502 "punpckhdq %%mm5, %%mm2 \n\t"
00503 "movq 0(%1), %%mm1 \n\t"
00504 "movq 8(%1), %%mm3 \n\t"
00505 "movq 16(%1), %%mm4 \n\t"
00506 "movq 24(%1), %%mm5 \n\t"
00507 "movq 32(%1), %%mm6 \n\t"
00508 "movq 40(%1), %%mm7 \n\t"
00509 "pmaddwd %%mm0, %%mm1 \n\t"
00510 "pmaddwd %%mm2, %%mm3 \n\t"
00511 "pmaddwd %%mm0, %%mm4 \n\t"
00512 "pmaddwd %%mm2, %%mm5 \n\t"
00513 "pmaddwd %%mm0, %%mm6 \n\t"
00514 "pmaddwd %%mm2, %%mm7 \n\t"
00515 "pmaddwd 48(%1), %%mm0 \n\t"
00516 "pmaddwd 56(%1), %%mm2 \n\t"
00517 "paddd %%mm1, %%mm3 \n\t"
00518 "paddd %%mm4, %%mm5 \n\t"
00519 "paddd %%mm6, %%mm7 \n\t"
00520 "paddd %%mm0, %%mm2 \n\t"
00521 "movq (%2), %%mm0 \n\t"
00522 "paddd %%mm0, %%mm3 \n\t"
00523 "paddd %%mm0, %%mm5 \n\t"
00524 "paddd %%mm0, %%mm7 \n\t"
00525 "paddd %%mm0, %%mm2 \n\t"
00526 "psrad $"S(SHIFT_FRW_ROW)", %%mm3 \n\t"
00527 "psrad $"S(SHIFT_FRW_ROW)", %%mm5 \n\t"
00528 "psrad $"S(SHIFT_FRW_ROW)", %%mm7 \n\t"
00529 "psrad $"S(SHIFT_FRW_ROW)", %%mm2 \n\t"
00530 "packssdw %%mm5, %%mm3 \n\t"
00531 "packssdw %%mm2, %%mm7 \n\t"
00532 "movq %%mm3, 0(%3) \n\t"
00533 "movq %%mm7, 8(%3) \n\t"
00534 :
00535 : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
00536 }
00537
00538 void ff_fdct_mmx(int16_t *block)
00539 {
00540 DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
00541 int16_t * block1= (int16_t*)align_tmp;
00542 const int16_t *table= tab_frw_01234567;
00543 int i;
00544
00545 fdct_col_mmx(block, block1, 0);
00546 fdct_col_mmx(block, block1, 4);
00547
00548 for(i=8;i>0;i--) {
00549 fdct_row_mmx(block1, block, table);
00550 block1 += 8;
00551 table += 32;
00552 block += 8;
00553 }
00554 }
00555
00556 void ff_fdct_mmx2(int16_t *block)
00557 {
00558 DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
00559 int16_t *block1= (int16_t*)align_tmp;
00560 const int16_t *table= tab_frw_01234567;
00561 int i;
00562
00563 fdct_col_mmx(block, block1, 0);
00564 fdct_col_mmx(block, block1, 4);
00565
00566 for(i=8;i>0;i--) {
00567 fdct_row_mmx2(block1, block, table);
00568 block1 += 8;
00569 table += 32;
00570 block += 8;
00571 }
00572 }
00573
00574 void ff_fdct_sse2(int16_t *block)
00575 {
00576 DECLARE_ALIGNED(16, int64_t, align_tmp)[16];
00577 int16_t * const block1= (int16_t*)align_tmp;
00578
00579 fdct_col_sse2(block, block1, 0);
00580 fdct_row_sse2(block1, block);
00581 }
00582