00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "libavutil/common.h"
00034 #include "libavcodec/dsputil.h"
00035
00036 #define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
00037
00039
00040
00041
00042
00043
00044
00045
00046
00048
00049 #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
00050 #define SHIFT_FRW_COL BITS_FRW_ACC
00051 #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
00052 #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
00053
00054
00055 #define X8(x) x,x,x,x,x,x,x,x
00056
00057
00058 static const int16_t fdct_tg_all_16[24] ATTR_ALIGN(16) = {
00059 X8(13036),
00060 X8(27146),
00061 X8(-21746)
00062 };
00063
00064 static const int16_t ocos_4_16[8] ATTR_ALIGN(16) = {
00065 X8(23170)
00066 };
00067
00068 static const int16_t fdct_one_corr[8] ATTR_ALIGN(16) = { X8(1) };
00069
00070 static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW };
00071
00072 static struct
00073 {
00074 const int32_t fdct_r_row_sse2[4] ATTR_ALIGN(16);
00075 } fdct_r_row_sse2 ATTR_ALIGN(16)=
00076 {{
00077 RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW
00078 }};
00079
00080
00081 static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = {
00082 16384, 16384, 22725, 19266,
00083 16384, 16384, 12873, 4520,
00084 21407, 8867, 19266, -4520,
00085 -8867, -21407, -22725, -12873,
00086 16384, -16384, 12873, -22725,
00087 -16384, 16384, 4520, 19266,
00088 8867, -21407, 4520, -12873,
00089 21407, -8867, 19266, -22725,
00090
00091 22725, 22725, 31521, 26722,
00092 22725, 22725, 17855, 6270,
00093 29692, 12299, 26722, -6270,
00094 -12299, -29692, -31521, -17855,
00095 22725, -22725, 17855, -31521,
00096 -22725, 22725, 6270, 26722,
00097 12299, -29692, 6270, -17855,
00098 29692, -12299, 26722, -31521,
00099
00100 21407, 21407, 29692, 25172,
00101 21407, 21407, 16819, 5906,
00102 27969, 11585, 25172, -5906,
00103 -11585, -27969, -29692, -16819,
00104 21407, -21407, 16819, -29692,
00105 -21407, 21407, 5906, 25172,
00106 11585, -27969, 5906, -16819,
00107 27969, -11585, 25172, -29692,
00108
00109 19266, 19266, 26722, 22654,
00110 19266, 19266, 15137, 5315,
00111 25172, 10426, 22654, -5315,
00112 -10426, -25172, -26722, -15137,
00113 19266, -19266, 15137, -26722,
00114 -19266, 19266, 5315, 22654,
00115 10426, -25172, 5315, -15137,
00116 25172, -10426, 22654, -26722,
00117
00118 16384, 16384, 22725, 19266,
00119 16384, 16384, 12873, 4520,
00120 21407, 8867, 19266, -4520,
00121 -8867, -21407, -22725, -12873,
00122 16384, -16384, 12873, -22725,
00123 -16384, 16384, 4520, 19266,
00124 8867, -21407, 4520, -12873,
00125 21407, -8867, 19266, -22725,
00126
00127 19266, 19266, 26722, 22654,
00128 19266, 19266, 15137, 5315,
00129 25172, 10426, 22654, -5315,
00130 -10426, -25172, -26722, -15137,
00131 19266, -19266, 15137, -26722,
00132 -19266, 19266, 5315, 22654,
00133 10426, -25172, 5315, -15137,
00134 25172, -10426, 22654, -26722,
00135
00136 21407, 21407, 29692, 25172,
00137 21407, 21407, 16819, 5906,
00138 27969, 11585, 25172, -5906,
00139 -11585, -27969, -29692, -16819,
00140 21407, -21407, 16819, -29692,
00141 -21407, 21407, 5906, 25172,
00142 11585, -27969, 5906, -16819,
00143 27969, -11585, 25172, -29692,
00144
00145 22725, 22725, 31521, 26722,
00146 22725, 22725, 17855, 6270,
00147 29692, 12299, 26722, -6270,
00148 -12299, -29692, -31521, -17855,
00149 22725, -22725, 17855, -31521,
00150 -22725, 22725, 6270, 26722,
00151 12299, -29692, 6270, -17855,
00152 29692, -12299, 26722, -31521,
00153 };
00154
00155 static struct
00156 {
00157 const int16_t tab_frw_01234567_sse2[256] ATTR_ALIGN(16);
00158 } tab_frw_01234567_sse2 ATTR_ALIGN(16) =
00159 {{
00160
00161 #define TABLE_SSE2 C4, C4, C1, C3, -C6, -C2, -C1, -C5, \
00162 C4, C4, C5, C7, C2, C6, C3, -C7, \
00163 -C4, C4, C7, C3, C6, -C2, C7, -C5, \
00164 C4, -C4, C5, -C1, C2, -C6, C3, -C1,
00165
00166 #define C1 22725
00167 #define C2 21407
00168 #define C3 19266
00169 #define C4 16384
00170 #define C5 12873
00171 #define C6 8867
00172 #define C7 4520
00173 TABLE_SSE2
00174
00175 #undef C1
00176 #undef C2
00177 #undef C3
00178 #undef C4
00179 #undef C5
00180 #undef C6
00181 #undef C7
00182 #define C1 31521
00183 #define C2 29692
00184 #define C3 26722
00185 #define C4 22725
00186 #define C5 17855
00187 #define C6 12299
00188 #define C7 6270
00189 TABLE_SSE2
00190
00191 #undef C1
00192 #undef C2
00193 #undef C3
00194 #undef C4
00195 #undef C5
00196 #undef C6
00197 #undef C7
00198 #define C1 29692
00199 #define C2 27969
00200 #define C3 25172
00201 #define C4 21407
00202 #define C5 16819
00203 #define C6 11585
00204 #define C7 5906
00205 TABLE_SSE2
00206
00207 #undef C1
00208 #undef C2
00209 #undef C3
00210 #undef C4
00211 #undef C5
00212 #undef C6
00213 #undef C7
00214 #define C1 26722
00215 #define C2 25172
00216 #define C3 22654
00217 #define C4 19266
00218 #define C5 15137
00219 #define C6 10426
00220 #define C7 5315
00221 TABLE_SSE2
00222
00223 #undef C1
00224 #undef C2
00225 #undef C3
00226 #undef C4
00227 #undef C5
00228 #undef C6
00229 #undef C7
00230 #define C1 22725
00231 #define C2 21407
00232 #define C3 19266
00233 #define C4 16384
00234 #define C5 12873
00235 #define C6 8867
00236 #define C7 4520
00237 TABLE_SSE2
00238
00239 #undef C1
00240 #undef C2
00241 #undef C3
00242 #undef C4
00243 #undef C5
00244 #undef C6
00245 #undef C7
00246 #define C1 26722
00247 #define C2 25172
00248 #define C3 22654
00249 #define C4 19266
00250 #define C5 15137
00251 #define C6 10426
00252 #define C7 5315
00253 TABLE_SSE2
00254
00255 #undef C1
00256 #undef C2
00257 #undef C3
00258 #undef C4
00259 #undef C5
00260 #undef C6
00261 #undef C7
00262 #define C1 29692
00263 #define C2 27969
00264 #define C3 25172
00265 #define C4 21407
00266 #define C5 16819
00267 #define C6 11585
00268 #define C7 5906
00269 TABLE_SSE2
00270
00271 #undef C1
00272 #undef C2
00273 #undef C3
00274 #undef C4
00275 #undef C5
00276 #undef C6
00277 #undef C7
00278 #define C1 31521
00279 #define C2 29692
00280 #define C3 26722
00281 #define C4 22725
00282 #define C5 17855
00283 #define C6 12299
00284 #define C7 6270
00285 TABLE_SSE2
00286 }};
00287
00288 #define S(s) AV_TOSTRING(s) //AV_STRINGIFY is too long
00289
00290 #define FDCT_COL(cpu, mm, mov)\
00291 static av_always_inline void fdct_col_##cpu(const int16_t *in, int16_t *out, int offset)\
00292 {\
00293 __asm__ volatile (\
00294 #mov" 16(%0), %%"#mm"0 \n\t" \
00295 #mov" 96(%0), %%"#mm"1 \n\t" \
00296 #mov" %%"#mm"0, %%"#mm"2 \n\t" \
00297 #mov" 32(%0), %%"#mm"3 \n\t" \
00298 "paddsw %%"#mm"1, %%"#mm"0 \n\t" \
00299 #mov" 80(%0), %%"#mm"4 \n\t" \
00300 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"0 \n\t" \
00301 #mov" (%0), %%"#mm"5 \n\t" \
00302 "paddsw %%"#mm"3, %%"#mm"4 \n\t" \
00303 "paddsw 112(%0), %%"#mm"5 \n\t" \
00304 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"4 \n\t" \
00305 #mov" %%"#mm"0, %%"#mm"6 \n\t" \
00306 "psubsw %%"#mm"1, %%"#mm"2 \n\t" \
00307 #mov" 16(%1), %%"#mm"1 \n\t" \
00308 "psubsw %%"#mm"4, %%"#mm"0 \n\t" \
00309 #mov" 48(%0), %%"#mm"7 \n\t" \
00310 "pmulhw %%"#mm"0, %%"#mm"1 \n\t" \
00311 "paddsw 64(%0), %%"#mm"7 \n\t" \
00312 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"5 \n\t" \
00313 "paddsw %%"#mm"4, %%"#mm"6 \n\t" \
00314 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"7 \n\t" \
00315 #mov" %%"#mm"5, %%"#mm"4 \n\t" \
00316 "psubsw %%"#mm"7, %%"#mm"5 \n\t" \
00317 "paddsw %%"#mm"5, %%"#mm"1 \n\t" \
00318 "paddsw %%"#mm"7, %%"#mm"4 \n\t" \
00319 "por (%2), %%"#mm"1 \n\t" \
00320 "psllw $"S(SHIFT_FRW_COL)"+1, %%"#mm"2 \n\t" \
00321 "pmulhw 16(%1), %%"#mm"5 \n\t" \
00322 #mov" %%"#mm"4, %%"#mm"7 \n\t" \
00323 "psubsw 80(%0), %%"#mm"3 \n\t" \
00324 "psubsw %%"#mm"6, %%"#mm"4 \n\t" \
00325 #mov" %%"#mm"1, 32(%3) \n\t" \
00326 "paddsw %%"#mm"6, %%"#mm"7 \n\t" \
00327 #mov" 48(%0), %%"#mm"1 \n\t" \
00328 "psllw $"S(SHIFT_FRW_COL)"+1, %%"#mm"3 \n\t" \
00329 "psubsw 64(%0), %%"#mm"1 \n\t" \
00330 #mov" %%"#mm"2, %%"#mm"6 \n\t" \
00331 #mov" %%"#mm"4, 64(%3) \n\t" \
00332 "paddsw %%"#mm"3, %%"#mm"2 \n\t" \
00333 "pmulhw (%4), %%"#mm"2 \n\t" \
00334 "psubsw %%"#mm"3, %%"#mm"6 \n\t" \
00335 "pmulhw (%4), %%"#mm"6 \n\t" \
00336 "psubsw %%"#mm"0, %%"#mm"5 \n\t" \
00337 "por (%2), %%"#mm"5 \n\t" \
00338 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"1 \n\t" \
00339 "por (%2), %%"#mm"2 \n\t" \
00340 #mov" %%"#mm"1, %%"#mm"4 \n\t" \
00341 #mov" (%0), %%"#mm"3 \n\t" \
00342 "paddsw %%"#mm"6, %%"#mm"1 \n\t" \
00343 "psubsw 112(%0), %%"#mm"3 \n\t" \
00344 "psubsw %%"#mm"6, %%"#mm"4 \n\t" \
00345 #mov" (%1), %%"#mm"0 \n\t" \
00346 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"3 \n\t" \
00347 #mov" 32(%1), %%"#mm"6 \n\t" \
00348 "pmulhw %%"#mm"1, %%"#mm"0 \n\t" \
00349 #mov" %%"#mm"7, (%3) \n\t" \
00350 "pmulhw %%"#mm"4, %%"#mm"6 \n\t" \
00351 #mov" %%"#mm"5, 96(%3) \n\t" \
00352 #mov" %%"#mm"3, %%"#mm"7 \n\t" \
00353 #mov" 32(%1), %%"#mm"5 \n\t" \
00354 "psubsw %%"#mm"2, %%"#mm"7 \n\t" \
00355 "paddsw %%"#mm"2, %%"#mm"3 \n\t" \
00356 "pmulhw %%"#mm"7, %%"#mm"5 \n\t" \
00357 "paddsw %%"#mm"3, %%"#mm"0 \n\t" \
00358 "paddsw %%"#mm"4, %%"#mm"6 \n\t" \
00359 "pmulhw (%1), %%"#mm"3 \n\t" \
00360 "por (%2), %%"#mm"0 \n\t" \
00361 "paddsw %%"#mm"7, %%"#mm"5 \n\t" \
00362 "psubsw %%"#mm"6, %%"#mm"7 \n\t" \
00363 #mov" %%"#mm"0, 16(%3) \n\t" \
00364 "paddsw %%"#mm"4, %%"#mm"5 \n\t" \
00365 #mov" %%"#mm"7, 48(%3) \n\t" \
00366 "psubsw %%"#mm"1, %%"#mm"3 \n\t" \
00367 #mov" %%"#mm"5, 80(%3) \n\t" \
00368 #mov" %%"#mm"3, 112(%3) \n\t" \
00369 : \
00370 : "r" (in + offset), "r" (fdct_tg_all_16), "r" (fdct_one_corr), \
00371 "r" (out + offset), "r" (ocos_4_16)); \
00372 }
00373
00374 FDCT_COL(mmx, mm, movq)
00375 FDCT_COL(sse2, xmm, movdqa)
00376
00377 static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
00378 {
00379 __asm__ volatile(
00380 #define FDCT_ROW_SSE2_H1(i,t) \
00381 "movq " #i "(%0), %%xmm2 \n\t" \
00382 "movq " #i "+8(%0), %%xmm0 \n\t" \
00383 "movdqa " #t "+32(%1), %%xmm3 \n\t" \
00384 "movdqa " #t "+48(%1), %%xmm7 \n\t" \
00385 "movdqa " #t "(%1), %%xmm4 \n\t" \
00386 "movdqa " #t "+16(%1), %%xmm5 \n\t"
00387
00388 #define FDCT_ROW_SSE2_H2(i,t) \
00389 "movq " #i "(%0), %%xmm2 \n\t" \
00390 "movq " #i "+8(%0), %%xmm0 \n\t" \
00391 "movdqa " #t "+32(%1), %%xmm3 \n\t" \
00392 "movdqa " #t "+48(%1), %%xmm7 \n\t"
00393
00394 #define FDCT_ROW_SSE2(i) \
00395 "movq %%xmm2, %%xmm1 \n\t" \
00396 "pshuflw $27, %%xmm0, %%xmm0 \n\t" \
00397 "paddsw %%xmm0, %%xmm1 \n\t" \
00398 "psubsw %%xmm0, %%xmm2 \n\t" \
00399 "punpckldq %%xmm2, %%xmm1 \n\t" \
00400 "pshufd $78, %%xmm1, %%xmm2 \n\t" \
00401 "pmaddwd %%xmm2, %%xmm3 \n\t" \
00402 "pmaddwd %%xmm1, %%xmm7 \n\t" \
00403 "pmaddwd %%xmm5, %%xmm2 \n\t" \
00404 "pmaddwd %%xmm4, %%xmm1 \n\t" \
00405 "paddd %%xmm7, %%xmm3 \n\t" \
00406 "paddd %%xmm2, %%xmm1 \n\t" \
00407 "paddd %%xmm6, %%xmm3 \n\t" \
00408 "paddd %%xmm6, %%xmm1 \n\t" \
00409 "psrad %3, %%xmm3 \n\t" \
00410 "psrad %3, %%xmm1 \n\t" \
00411 "packssdw %%xmm3, %%xmm1 \n\t" \
00412 "movdqa %%xmm1, " #i "(%4) \n\t"
00413
00414 "movdqa (%2), %%xmm6 \n\t"
00415 FDCT_ROW_SSE2_H1(0,0)
00416 FDCT_ROW_SSE2(0)
00417 FDCT_ROW_SSE2_H2(64,0)
00418 FDCT_ROW_SSE2(64)
00419
00420 FDCT_ROW_SSE2_H1(16,64)
00421 FDCT_ROW_SSE2(16)
00422 FDCT_ROW_SSE2_H2(112,64)
00423 FDCT_ROW_SSE2(112)
00424
00425 FDCT_ROW_SSE2_H1(32,128)
00426 FDCT_ROW_SSE2(32)
00427 FDCT_ROW_SSE2_H2(96,128)
00428 FDCT_ROW_SSE2(96)
00429
00430 FDCT_ROW_SSE2_H1(48,192)
00431 FDCT_ROW_SSE2(48)
00432 FDCT_ROW_SSE2_H2(80,192)
00433 FDCT_ROW_SSE2(80)
00434 :
00435 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
00436 );
00437 }
00438
00439 static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
00440 {
00441 __asm__ volatile (
00442 "pshufw $0x1B, 8(%0), %%mm5 \n\t"
00443 "movq (%0), %%mm0 \n\t"
00444 "movq %%mm0, %%mm1 \n\t"
00445 "paddsw %%mm5, %%mm0 \n\t"
00446 "psubsw %%mm5, %%mm1 \n\t"
00447 "movq %%mm0, %%mm2 \n\t"
00448 "punpckldq %%mm1, %%mm0 \n\t"
00449 "punpckhdq %%mm1, %%mm2 \n\t"
00450 "movq (%1), %%mm1 \n\t"
00451 "movq 8(%1), %%mm3 \n\t"
00452 "movq 16(%1), %%mm4 \n\t"
00453 "movq 24(%1), %%mm5 \n\t"
00454 "movq 32(%1), %%mm6 \n\t"
00455 "movq 40(%1), %%mm7 \n\t"
00456 "pmaddwd %%mm0, %%mm1 \n\t"
00457 "pmaddwd %%mm2, %%mm3 \n\t"
00458 "pmaddwd %%mm0, %%mm4 \n\t"
00459 "pmaddwd %%mm2, %%mm5 \n\t"
00460 "pmaddwd %%mm0, %%mm6 \n\t"
00461 "pmaddwd %%mm2, %%mm7 \n\t"
00462 "pmaddwd 48(%1), %%mm0 \n\t"
00463 "pmaddwd 56(%1), %%mm2 \n\t"
00464 "paddd %%mm1, %%mm3 \n\t"
00465 "paddd %%mm4, %%mm5 \n\t"
00466 "paddd %%mm6, %%mm7 \n\t"
00467 "paddd %%mm0, %%mm2 \n\t"
00468 "movq (%2), %%mm0 \n\t"
00469 "paddd %%mm0, %%mm3 \n\t"
00470 "paddd %%mm0, %%mm5 \n\t"
00471 "paddd %%mm0, %%mm7 \n\t"
00472 "paddd %%mm0, %%mm2 \n\t"
00473 "psrad $"S(SHIFT_FRW_ROW)", %%mm3 \n\t"
00474 "psrad $"S(SHIFT_FRW_ROW)", %%mm5 \n\t"
00475 "psrad $"S(SHIFT_FRW_ROW)", %%mm7 \n\t"
00476 "psrad $"S(SHIFT_FRW_ROW)", %%mm2 \n\t"
00477 "packssdw %%mm5, %%mm3 \n\t"
00478 "packssdw %%mm2, %%mm7 \n\t"
00479 "movq %%mm3, (%3) \n\t"
00480 "movq %%mm7, 8(%3) \n\t"
00481 :
00482 : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
00483 }
00484
00485 static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
00486 {
00487
00488 __asm__ volatile(
00489 "movd 12(%0), %%mm1 \n\t"
00490 "punpcklwd 8(%0), %%mm1 \n\t"
00491 "movq %%mm1, %%mm2 \n\t"
00492 "psrlq $0x20, %%mm1 \n\t"
00493 "movq 0(%0), %%mm0 \n\t"
00494 "punpcklwd %%mm2, %%mm1 \n\t"
00495 "movq %%mm0, %%mm5 \n\t"
00496 "paddsw %%mm1, %%mm0 \n\t"
00497 "psubsw %%mm1, %%mm5 \n\t"
00498 "movq %%mm0, %%mm2 \n\t"
00499 "punpckldq %%mm5, %%mm0 \n\t"
00500 "punpckhdq %%mm5, %%mm2 \n\t"
00501 "movq 0(%1), %%mm1 \n\t"
00502 "movq 8(%1), %%mm3 \n\t"
00503 "movq 16(%1), %%mm4 \n\t"
00504 "movq 24(%1), %%mm5 \n\t"
00505 "movq 32(%1), %%mm6 \n\t"
00506 "movq 40(%1), %%mm7 \n\t"
00507 "pmaddwd %%mm0, %%mm1 \n\t"
00508 "pmaddwd %%mm2, %%mm3 \n\t"
00509 "pmaddwd %%mm0, %%mm4 \n\t"
00510 "pmaddwd %%mm2, %%mm5 \n\t"
00511 "pmaddwd %%mm0, %%mm6 \n\t"
00512 "pmaddwd %%mm2, %%mm7 \n\t"
00513 "pmaddwd 48(%1), %%mm0 \n\t"
00514 "pmaddwd 56(%1), %%mm2 \n\t"
00515 "paddd %%mm1, %%mm3 \n\t"
00516 "paddd %%mm4, %%mm5 \n\t"
00517 "paddd %%mm6, %%mm7 \n\t"
00518 "paddd %%mm0, %%mm2 \n\t"
00519 "movq (%2), %%mm0 \n\t"
00520 "paddd %%mm0, %%mm3 \n\t"
00521 "paddd %%mm0, %%mm5 \n\t"
00522 "paddd %%mm0, %%mm7 \n\t"
00523 "paddd %%mm0, %%mm2 \n\t"
00524 "psrad $"S(SHIFT_FRW_ROW)", %%mm3 \n\t"
00525 "psrad $"S(SHIFT_FRW_ROW)", %%mm5 \n\t"
00526 "psrad $"S(SHIFT_FRW_ROW)", %%mm7 \n\t"
00527 "psrad $"S(SHIFT_FRW_ROW)", %%mm2 \n\t"
00528 "packssdw %%mm5, %%mm3 \n\t"
00529 "packssdw %%mm2, %%mm7 \n\t"
00530 "movq %%mm3, 0(%3) \n\t"
00531 "movq %%mm7, 8(%3) \n\t"
00532 :
00533 : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
00534 }
00535
00536 void ff_fdct_mmx(int16_t *block)
00537 {
00538 int64_t align_tmp[16] ATTR_ALIGN(8);
00539 int16_t * block1= (int16_t*)align_tmp;
00540 const int16_t *table= tab_frw_01234567;
00541 int i;
00542
00543 fdct_col_mmx(block, block1, 0);
00544 fdct_col_mmx(block, block1, 4);
00545
00546 for(i=8;i>0;i--) {
00547 fdct_row_mmx(block1, block, table);
00548 block1 += 8;
00549 table += 32;
00550 block += 8;
00551 }
00552 }
00553
00554 void ff_fdct_mmx2(int16_t *block)
00555 {
00556 int64_t align_tmp[16] ATTR_ALIGN(8);
00557 int16_t *block1= (int16_t*)align_tmp;
00558 const int16_t *table= tab_frw_01234567;
00559 int i;
00560
00561 fdct_col_mmx(block, block1, 0);
00562 fdct_col_mmx(block, block1, 4);
00563
00564 for(i=8;i>0;i--) {
00565 fdct_row_mmx2(block1, block, table);
00566 block1 += 8;
00567 table += 32;
00568 block += 8;
00569 }
00570 }
00571
00572 void ff_fdct_sse2(int16_t *block)
00573 {
00574 int64_t align_tmp[16] ATTR_ALIGN(16);
00575 int16_t * const block1= (int16_t*)align_tmp;
00576
00577 fdct_col_sse2(block, block1, 0);
00578 fdct_row_sse2(block1, block);
00579 }
00580