00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "libavutil/cpu.h"
00023 #include "libavutil/x86/asm.h"
00024 #include "libavcodec/avcodec.h"
00025 #include "libavcodec/dsputil.h"
00026 #include "libavcodec/mpegvideo.h"
00027 #include "dsputil_mmx.h"
00028
00029 #if HAVE_INLINE_ASM
00030
00031 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
00032 DCTELEM *block, int n, int qscale)
00033 {
00034 x86_reg level, qmul, qadd, nCoeffs;
00035
00036 qmul = qscale << 1;
00037
00038 av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
00039
00040 if (!s->h263_aic) {
00041 if (n < 4)
00042 level = block[0] * s->y_dc_scale;
00043 else
00044 level = block[0] * s->c_dc_scale;
00045 qadd = (qscale - 1) | 1;
00046 }else{
00047 qadd = 0;
00048 level= block[0];
00049 }
00050 if(s->ac_pred)
00051 nCoeffs=63;
00052 else
00053 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
00054
00055 __asm__ volatile(
00056 "movd %1, %%mm6 \n\t"
00057 "packssdw %%mm6, %%mm6 \n\t"
00058 "packssdw %%mm6, %%mm6 \n\t"
00059 "movd %2, %%mm5 \n\t"
00060 "pxor %%mm7, %%mm7 \n\t"
00061 "packssdw %%mm5, %%mm5 \n\t"
00062 "packssdw %%mm5, %%mm5 \n\t"
00063 "psubw %%mm5, %%mm7 \n\t"
00064 "pxor %%mm4, %%mm4 \n\t"
00065 ".p2align 4 \n\t"
00066 "1: \n\t"
00067 "movq (%0, %3), %%mm0 \n\t"
00068 "movq 8(%0, %3), %%mm1 \n\t"
00069
00070 "pmullw %%mm6, %%mm0 \n\t"
00071 "pmullw %%mm6, %%mm1 \n\t"
00072
00073 "movq (%0, %3), %%mm2 \n\t"
00074 "movq 8(%0, %3), %%mm3 \n\t"
00075
00076 "pcmpgtw %%mm4, %%mm2 \n\t"
00077 "pcmpgtw %%mm4, %%mm3 \n\t"
00078
00079 "pxor %%mm2, %%mm0 \n\t"
00080 "pxor %%mm3, %%mm1 \n\t"
00081
00082 "paddw %%mm7, %%mm0 \n\t"
00083 "paddw %%mm7, %%mm1 \n\t"
00084
00085 "pxor %%mm0, %%mm2 \n\t"
00086 "pxor %%mm1, %%mm3 \n\t"
00087
00088 "pcmpeqw %%mm7, %%mm0 \n\t"
00089 "pcmpeqw %%mm7, %%mm1 \n\t"
00090
00091 "pandn %%mm2, %%mm0 \n\t"
00092 "pandn %%mm3, %%mm1 \n\t"
00093
00094 "movq %%mm0, (%0, %3) \n\t"
00095 "movq %%mm1, 8(%0, %3) \n\t"
00096
00097 "add $16, %3 \n\t"
00098 "jng 1b \n\t"
00099 ::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
00100 : "memory"
00101 );
00102 block[0]= level;
00103 }
00104
00105
00106 static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
00107 DCTELEM *block, int n, int qscale)
00108 {
00109 x86_reg qmul, qadd, nCoeffs;
00110
00111 qmul = qscale << 1;
00112 qadd = (qscale - 1) | 1;
00113
00114 assert(s->block_last_index[n]>=0 || s->h263_aic);
00115
00116 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
00117
00118 __asm__ volatile(
00119 "movd %1, %%mm6 \n\t"
00120 "packssdw %%mm6, %%mm6 \n\t"
00121 "packssdw %%mm6, %%mm6 \n\t"
00122 "movd %2, %%mm5 \n\t"
00123 "pxor %%mm7, %%mm7 \n\t"
00124 "packssdw %%mm5, %%mm5 \n\t"
00125 "packssdw %%mm5, %%mm5 \n\t"
00126 "psubw %%mm5, %%mm7 \n\t"
00127 "pxor %%mm4, %%mm4 \n\t"
00128 ".p2align 4 \n\t"
00129 "1: \n\t"
00130 "movq (%0, %3), %%mm0 \n\t"
00131 "movq 8(%0, %3), %%mm1 \n\t"
00132
00133 "pmullw %%mm6, %%mm0 \n\t"
00134 "pmullw %%mm6, %%mm1 \n\t"
00135
00136 "movq (%0, %3), %%mm2 \n\t"
00137 "movq 8(%0, %3), %%mm3 \n\t"
00138
00139 "pcmpgtw %%mm4, %%mm2 \n\t"
00140 "pcmpgtw %%mm4, %%mm3 \n\t"
00141
00142 "pxor %%mm2, %%mm0 \n\t"
00143 "pxor %%mm3, %%mm1 \n\t"
00144
00145 "paddw %%mm7, %%mm0 \n\t"
00146 "paddw %%mm7, %%mm1 \n\t"
00147
00148 "pxor %%mm0, %%mm2 \n\t"
00149 "pxor %%mm1, %%mm3 \n\t"
00150
00151 "pcmpeqw %%mm7, %%mm0 \n\t"
00152 "pcmpeqw %%mm7, %%mm1 \n\t"
00153
00154 "pandn %%mm2, %%mm0 \n\t"
00155 "pandn %%mm3, %%mm1 \n\t"
00156
00157 "movq %%mm0, (%0, %3) \n\t"
00158 "movq %%mm1, 8(%0, %3) \n\t"
00159
00160 "add $16, %3 \n\t"
00161 "jng 1b \n\t"
00162 ::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
00163 : "memory"
00164 );
00165 }
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189 static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
00190 DCTELEM *block, int n, int qscale)
00191 {
00192 x86_reg nCoeffs;
00193 const uint16_t *quant_matrix;
00194 int block0;
00195
00196 av_assert2(s->block_last_index[n]>=0);
00197
00198 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
00199
00200 if (n < 4)
00201 block0 = block[0] * s->y_dc_scale;
00202 else
00203 block0 = block[0] * s->c_dc_scale;
00204
00205 quant_matrix = s->intra_matrix;
00206 __asm__ volatile(
00207 "pcmpeqw %%mm7, %%mm7 \n\t"
00208 "psrlw $15, %%mm7 \n\t"
00209 "movd %2, %%mm6 \n\t"
00210 "packssdw %%mm6, %%mm6 \n\t"
00211 "packssdw %%mm6, %%mm6 \n\t"
00212 "mov %3, %%"REG_a" \n\t"
00213 ".p2align 4 \n\t"
00214 "1: \n\t"
00215 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00216 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00217 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00218 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00219 "pmullw %%mm6, %%mm4 \n\t"
00220 "pmullw %%mm6, %%mm5 \n\t"
00221 "pxor %%mm2, %%mm2 \n\t"
00222 "pxor %%mm3, %%mm3 \n\t"
00223 "pcmpgtw %%mm0, %%mm2 \n\t"
00224 "pcmpgtw %%mm1, %%mm3 \n\t"
00225 "pxor %%mm2, %%mm0 \n\t"
00226 "pxor %%mm3, %%mm1 \n\t"
00227 "psubw %%mm2, %%mm0 \n\t"
00228 "psubw %%mm3, %%mm1 \n\t"
00229 "pmullw %%mm4, %%mm0 \n\t"
00230 "pmullw %%mm5, %%mm1 \n\t"
00231 "pxor %%mm4, %%mm4 \n\t"
00232 "pxor %%mm5, %%mm5 \n\t"
00233 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00234 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00235 "psraw $3, %%mm0 \n\t"
00236 "psraw $3, %%mm1 \n\t"
00237 "psubw %%mm7, %%mm0 \n\t"
00238 "psubw %%mm7, %%mm1 \n\t"
00239 "por %%mm7, %%mm0 \n\t"
00240 "por %%mm7, %%mm1 \n\t"
00241 "pxor %%mm2, %%mm0 \n\t"
00242 "pxor %%mm3, %%mm1 \n\t"
00243 "psubw %%mm2, %%mm0 \n\t"
00244 "psubw %%mm3, %%mm1 \n\t"
00245 "pandn %%mm0, %%mm4 \n\t"
00246 "pandn %%mm1, %%mm5 \n\t"
00247 "movq %%mm4, (%0, %%"REG_a") \n\t"
00248 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00249
00250 "add $16, %%"REG_a" \n\t"
00251 "js 1b \n\t"
00252 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
00253 : "%"REG_a, "memory"
00254 );
00255 block[0]= block0;
00256 }
00257
00258 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
00259 DCTELEM *block, int n, int qscale)
00260 {
00261 x86_reg nCoeffs;
00262 const uint16_t *quant_matrix;
00263
00264 av_assert2(s->block_last_index[n]>=0);
00265
00266 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
00267
00268 quant_matrix = s->inter_matrix;
00269 __asm__ volatile(
00270 "pcmpeqw %%mm7, %%mm7 \n\t"
00271 "psrlw $15, %%mm7 \n\t"
00272 "movd %2, %%mm6 \n\t"
00273 "packssdw %%mm6, %%mm6 \n\t"
00274 "packssdw %%mm6, %%mm6 \n\t"
00275 "mov %3, %%"REG_a" \n\t"
00276 ".p2align 4 \n\t"
00277 "1: \n\t"
00278 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00279 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00280 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00281 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00282 "pmullw %%mm6, %%mm4 \n\t"
00283 "pmullw %%mm6, %%mm5 \n\t"
00284 "pxor %%mm2, %%mm2 \n\t"
00285 "pxor %%mm3, %%mm3 \n\t"
00286 "pcmpgtw %%mm0, %%mm2 \n\t"
00287 "pcmpgtw %%mm1, %%mm3 \n\t"
00288 "pxor %%mm2, %%mm0 \n\t"
00289 "pxor %%mm3, %%mm1 \n\t"
00290 "psubw %%mm2, %%mm0 \n\t"
00291 "psubw %%mm3, %%mm1 \n\t"
00292 "paddw %%mm0, %%mm0 \n\t"
00293 "paddw %%mm1, %%mm1 \n\t"
00294 "paddw %%mm7, %%mm0 \n\t"
00295 "paddw %%mm7, %%mm1 \n\t"
00296 "pmullw %%mm4, %%mm0 \n\t"
00297 "pmullw %%mm5, %%mm1 \n\t"
00298 "pxor %%mm4, %%mm4 \n\t"
00299 "pxor %%mm5, %%mm5 \n\t"
00300 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00301 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00302 "psraw $4, %%mm0 \n\t"
00303 "psraw $4, %%mm1 \n\t"
00304 "psubw %%mm7, %%mm0 \n\t"
00305 "psubw %%mm7, %%mm1 \n\t"
00306 "por %%mm7, %%mm0 \n\t"
00307 "por %%mm7, %%mm1 \n\t"
00308 "pxor %%mm2, %%mm0 \n\t"
00309 "pxor %%mm3, %%mm1 \n\t"
00310 "psubw %%mm2, %%mm0 \n\t"
00311 "psubw %%mm3, %%mm1 \n\t"
00312 "pandn %%mm0, %%mm4 \n\t"
00313 "pandn %%mm1, %%mm5 \n\t"
00314 "movq %%mm4, (%0, %%"REG_a") \n\t"
00315 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00316
00317 "add $16, %%"REG_a" \n\t"
00318 "js 1b \n\t"
00319 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
00320 : "%"REG_a, "memory"
00321 );
00322 }
00323
00324 static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
00325 DCTELEM *block, int n, int qscale)
00326 {
00327 x86_reg nCoeffs;
00328 const uint16_t *quant_matrix;
00329 int block0;
00330
00331 av_assert2(s->block_last_index[n]>=0);
00332
00333 if(s->alternate_scan) nCoeffs= 63;
00334 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
00335
00336 if (n < 4)
00337 block0 = block[0] * s->y_dc_scale;
00338 else
00339 block0 = block[0] * s->c_dc_scale;
00340 quant_matrix = s->intra_matrix;
00341 __asm__ volatile(
00342 "pcmpeqw %%mm7, %%mm7 \n\t"
00343 "psrlw $15, %%mm7 \n\t"
00344 "movd %2, %%mm6 \n\t"
00345 "packssdw %%mm6, %%mm6 \n\t"
00346 "packssdw %%mm6, %%mm6 \n\t"
00347 "mov %3, %%"REG_a" \n\t"
00348 ".p2align 4 \n\t"
00349 "1: \n\t"
00350 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00351 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00352 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00353 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00354 "pmullw %%mm6, %%mm4 \n\t"
00355 "pmullw %%mm6, %%mm5 \n\t"
00356 "pxor %%mm2, %%mm2 \n\t"
00357 "pxor %%mm3, %%mm3 \n\t"
00358 "pcmpgtw %%mm0, %%mm2 \n\t"
00359 "pcmpgtw %%mm1, %%mm3 \n\t"
00360 "pxor %%mm2, %%mm0 \n\t"
00361 "pxor %%mm3, %%mm1 \n\t"
00362 "psubw %%mm2, %%mm0 \n\t"
00363 "psubw %%mm3, %%mm1 \n\t"
00364 "pmullw %%mm4, %%mm0 \n\t"
00365 "pmullw %%mm5, %%mm1 \n\t"
00366 "pxor %%mm4, %%mm4 \n\t"
00367 "pxor %%mm5, %%mm5 \n\t"
00368 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00369 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00370 "psraw $3, %%mm0 \n\t"
00371 "psraw $3, %%mm1 \n\t"
00372 "pxor %%mm2, %%mm0 \n\t"
00373 "pxor %%mm3, %%mm1 \n\t"
00374 "psubw %%mm2, %%mm0 \n\t"
00375 "psubw %%mm3, %%mm1 \n\t"
00376 "pandn %%mm0, %%mm4 \n\t"
00377 "pandn %%mm1, %%mm5 \n\t"
00378 "movq %%mm4, (%0, %%"REG_a") \n\t"
00379 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00380
00381 "add $16, %%"REG_a" \n\t"
00382 "jng 1b \n\t"
00383 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
00384 : "%"REG_a, "memory"
00385 );
00386 block[0]= block0;
00387
00388 }
00389
00390 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
00391 DCTELEM *block, int n, int qscale)
00392 {
00393 x86_reg nCoeffs;
00394 const uint16_t *quant_matrix;
00395
00396 av_assert2(s->block_last_index[n]>=0);
00397
00398 if(s->alternate_scan) nCoeffs= 63;
00399 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
00400
00401 quant_matrix = s->inter_matrix;
00402 __asm__ volatile(
00403 "pcmpeqw %%mm7, %%mm7 \n\t"
00404 "psrlq $48, %%mm7 \n\t"
00405 "movd %2, %%mm6 \n\t"
00406 "packssdw %%mm6, %%mm6 \n\t"
00407 "packssdw %%mm6, %%mm6 \n\t"
00408 "mov %3, %%"REG_a" \n\t"
00409 ".p2align 4 \n\t"
00410 "1: \n\t"
00411 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00412 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00413 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00414 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00415 "pmullw %%mm6, %%mm4 \n\t"
00416 "pmullw %%mm6, %%mm5 \n\t"
00417 "pxor %%mm2, %%mm2 \n\t"
00418 "pxor %%mm3, %%mm3 \n\t"
00419 "pcmpgtw %%mm0, %%mm2 \n\t"
00420 "pcmpgtw %%mm1, %%mm3 \n\t"
00421 "pxor %%mm2, %%mm0 \n\t"
00422 "pxor %%mm3, %%mm1 \n\t"
00423 "psubw %%mm2, %%mm0 \n\t"
00424 "psubw %%mm3, %%mm1 \n\t"
00425 "paddw %%mm0, %%mm0 \n\t"
00426 "paddw %%mm1, %%mm1 \n\t"
00427 "pmullw %%mm4, %%mm0 \n\t"
00428 "pmullw %%mm5, %%mm1 \n\t"
00429 "paddw %%mm4, %%mm0 \n\t"
00430 "paddw %%mm5, %%mm1 \n\t"
00431 "pxor %%mm4, %%mm4 \n\t"
00432 "pxor %%mm5, %%mm5 \n\t"
00433 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00434 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00435 "psrlw $4, %%mm0 \n\t"
00436 "psrlw $4, %%mm1 \n\t"
00437 "pxor %%mm2, %%mm0 \n\t"
00438 "pxor %%mm3, %%mm1 \n\t"
00439 "psubw %%mm2, %%mm0 \n\t"
00440 "psubw %%mm3, %%mm1 \n\t"
00441 "pandn %%mm0, %%mm4 \n\t"
00442 "pandn %%mm1, %%mm5 \n\t"
00443 "pxor %%mm4, %%mm7 \n\t"
00444 "pxor %%mm5, %%mm7 \n\t"
00445 "movq %%mm4, (%0, %%"REG_a") \n\t"
00446 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00447
00448 "add $16, %%"REG_a" \n\t"
00449 "jng 1b \n\t"
00450 "movd 124(%0, %3), %%mm0 \n\t"
00451 "movq %%mm7, %%mm6 \n\t"
00452 "psrlq $32, %%mm7 \n\t"
00453 "pxor %%mm6, %%mm7 \n\t"
00454 "movq %%mm7, %%mm6 \n\t"
00455 "psrlq $16, %%mm7 \n\t"
00456 "pxor %%mm6, %%mm7 \n\t"
00457 "pslld $31, %%mm7 \n\t"
00458 "psrlq $15, %%mm7 \n\t"
00459 "pxor %%mm7, %%mm0 \n\t"
00460 "movd %%mm0, 124(%0, %3) \n\t"
00461
00462 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs)
00463 : "%"REG_a, "memory"
00464 );
00465 }
00466
00467 static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
00468 const int intra= s->mb_intra;
00469 int *sum= s->dct_error_sum[intra];
00470 uint16_t *offset= s->dct_offset[intra];
00471
00472 s->dct_count[intra]++;
00473
00474 __asm__ volatile(
00475 "pxor %%mm7, %%mm7 \n\t"
00476 "1: \n\t"
00477 "pxor %%mm0, %%mm0 \n\t"
00478 "pxor %%mm1, %%mm1 \n\t"
00479 "movq (%0), %%mm2 \n\t"
00480 "movq 8(%0), %%mm3 \n\t"
00481 "pcmpgtw %%mm2, %%mm0 \n\t"
00482 "pcmpgtw %%mm3, %%mm1 \n\t"
00483 "pxor %%mm0, %%mm2 \n\t"
00484 "pxor %%mm1, %%mm3 \n\t"
00485 "psubw %%mm0, %%mm2 \n\t"
00486 "psubw %%mm1, %%mm3 \n\t"
00487 "movq %%mm2, %%mm4 \n\t"
00488 "movq %%mm3, %%mm5 \n\t"
00489 "psubusw (%2), %%mm2 \n\t"
00490 "psubusw 8(%2), %%mm3 \n\t"
00491 "pxor %%mm0, %%mm2 \n\t"
00492 "pxor %%mm1, %%mm3 \n\t"
00493 "psubw %%mm0, %%mm2 \n\t"
00494 "psubw %%mm1, %%mm3 \n\t"
00495 "movq %%mm2, (%0) \n\t"
00496 "movq %%mm3, 8(%0) \n\t"
00497 "movq %%mm4, %%mm2 \n\t"
00498 "movq %%mm5, %%mm3 \n\t"
00499 "punpcklwd %%mm7, %%mm4 \n\t"
00500 "punpckhwd %%mm7, %%mm2 \n\t"
00501 "punpcklwd %%mm7, %%mm5 \n\t"
00502 "punpckhwd %%mm7, %%mm3 \n\t"
00503 "paddd (%1), %%mm4 \n\t"
00504 "paddd 8(%1), %%mm2 \n\t"
00505 "paddd 16(%1), %%mm5 \n\t"
00506 "paddd 24(%1), %%mm3 \n\t"
00507 "movq %%mm4, (%1) \n\t"
00508 "movq %%mm2, 8(%1) \n\t"
00509 "movq %%mm5, 16(%1) \n\t"
00510 "movq %%mm3, 24(%1) \n\t"
00511 "add $16, %0 \n\t"
00512 "add $32, %1 \n\t"
00513 "add $16, %2 \n\t"
00514 "cmp %3, %0 \n\t"
00515 " jb 1b \n\t"
00516 : "+r" (block), "+r" (sum), "+r" (offset)
00517 : "r"(block+64)
00518 );
00519 }
00520
00521 static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
00522 const int intra= s->mb_intra;
00523 int *sum= s->dct_error_sum[intra];
00524 uint16_t *offset= s->dct_offset[intra];
00525
00526 s->dct_count[intra]++;
00527
00528 __asm__ volatile(
00529 "pxor %%xmm7, %%xmm7 \n\t"
00530 "1: \n\t"
00531 "pxor %%xmm0, %%xmm0 \n\t"
00532 "pxor %%xmm1, %%xmm1 \n\t"
00533 "movdqa (%0), %%xmm2 \n\t"
00534 "movdqa 16(%0), %%xmm3 \n\t"
00535 "pcmpgtw %%xmm2, %%xmm0 \n\t"
00536 "pcmpgtw %%xmm3, %%xmm1 \n\t"
00537 "pxor %%xmm0, %%xmm2 \n\t"
00538 "pxor %%xmm1, %%xmm3 \n\t"
00539 "psubw %%xmm0, %%xmm2 \n\t"
00540 "psubw %%xmm1, %%xmm3 \n\t"
00541 "movdqa %%xmm2, %%xmm4 \n\t"
00542 "movdqa %%xmm3, %%xmm5 \n\t"
00543 "psubusw (%2), %%xmm2 \n\t"
00544 "psubusw 16(%2), %%xmm3 \n\t"
00545 "pxor %%xmm0, %%xmm2 \n\t"
00546 "pxor %%xmm1, %%xmm3 \n\t"
00547 "psubw %%xmm0, %%xmm2 \n\t"
00548 "psubw %%xmm1, %%xmm3 \n\t"
00549 "movdqa %%xmm2, (%0) \n\t"
00550 "movdqa %%xmm3, 16(%0) \n\t"
00551 "movdqa %%xmm4, %%xmm6 \n\t"
00552 "movdqa %%xmm5, %%xmm0 \n\t"
00553 "punpcklwd %%xmm7, %%xmm4 \n\t"
00554 "punpckhwd %%xmm7, %%xmm6 \n\t"
00555 "punpcklwd %%xmm7, %%xmm5 \n\t"
00556 "punpckhwd %%xmm7, %%xmm0 \n\t"
00557 "paddd (%1), %%xmm4 \n\t"
00558 "paddd 16(%1), %%xmm6 \n\t"
00559 "paddd 32(%1), %%xmm5 \n\t"
00560 "paddd 48(%1), %%xmm0 \n\t"
00561 "movdqa %%xmm4, (%1) \n\t"
00562 "movdqa %%xmm6, 16(%1) \n\t"
00563 "movdqa %%xmm5, 32(%1) \n\t"
00564 "movdqa %%xmm0, 48(%1) \n\t"
00565 "add $32, %0 \n\t"
00566 "add $64, %1 \n\t"
00567 "add $32, %2 \n\t"
00568 "cmp %3, %0 \n\t"
00569 " jb 1b \n\t"
00570 : "+r" (block), "+r" (sum), "+r" (offset)
00571 : "r"(block+64)
00572 XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
00573 "%xmm4", "%xmm5", "%xmm6", "%xmm7")
00574 );
00575 }
00576
00577 #endif
00578
00579 void ff_MPV_common_init_x86(MpegEncContext *s)
00580 {
00581 #if HAVE_INLINE_ASM
00582 int mm_flags = av_get_cpu_flags();
00583
00584 if (mm_flags & AV_CPU_FLAG_MMX) {
00585 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
00586 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
00587 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
00588 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
00589 if(!(s->flags & CODEC_FLAG_BITEXACT))
00590 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
00591 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
00592
00593 if (mm_flags & AV_CPU_FLAG_SSE2) {
00594 s->denoise_dct= denoise_dct_sse2;
00595 } else {
00596 s->denoise_dct= denoise_dct_mmx;
00597 }
00598 }
00599 #endif
00600 }