00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "libavutil/cpu.h"
00023 #include "libavutil/x86/asm.h"
00024 #include "libavcodec/avcodec.h"
00025 #include "libavcodec/dsputil.h"
00026 #include "libavcodec/mpegvideo.h"
00027 #include "dsputil_mmx.h"
00028
00029 #if HAVE_INLINE_ASM
00030
00031 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
00032 DCTELEM *block, int n, int qscale)
00033 {
00034 x86_reg level, qmul, qadd, nCoeffs;
00035
00036 qmul = qscale << 1;
00037
00038 assert(s->block_last_index[n]>=0 || s->h263_aic);
00039
00040 if (!s->h263_aic) {
00041 if (n < 4)
00042 level = block[0] * s->y_dc_scale;
00043 else
00044 level = block[0] * s->c_dc_scale;
00045 qadd = (qscale - 1) | 1;
00046 }else{
00047 qadd = 0;
00048 level= block[0];
00049 }
00050 if(s->ac_pred)
00051 nCoeffs=63;
00052 else
00053 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
00054
00055 __asm__ volatile(
00056 "movd %1, %%mm6 \n\t"
00057 "packssdw %%mm6, %%mm6 \n\t"
00058 "packssdw %%mm6, %%mm6 \n\t"
00059 "movd %2, %%mm5 \n\t"
00060 "pxor %%mm7, %%mm7 \n\t"
00061 "packssdw %%mm5, %%mm5 \n\t"
00062 "packssdw %%mm5, %%mm5 \n\t"
00063 "psubw %%mm5, %%mm7 \n\t"
00064 "pxor %%mm4, %%mm4 \n\t"
00065 ".p2align 4 \n\t"
00066 "1: \n\t"
00067 "movq (%0, %3), %%mm0 \n\t"
00068 "movq 8(%0, %3), %%mm1 \n\t"
00069
00070 "pmullw %%mm6, %%mm0 \n\t"
00071 "pmullw %%mm6, %%mm1 \n\t"
00072
00073 "movq (%0, %3), %%mm2 \n\t"
00074 "movq 8(%0, %3), %%mm3 \n\t"
00075
00076 "pcmpgtw %%mm4, %%mm2 \n\t"
00077 "pcmpgtw %%mm4, %%mm3 \n\t"
00078
00079 "pxor %%mm2, %%mm0 \n\t"
00080 "pxor %%mm3, %%mm1 \n\t"
00081
00082 "paddw %%mm7, %%mm0 \n\t"
00083 "paddw %%mm7, %%mm1 \n\t"
00084
00085 "pxor %%mm0, %%mm2 \n\t"
00086 "pxor %%mm1, %%mm3 \n\t"
00087
00088 "pcmpeqw %%mm7, %%mm0 \n\t"
00089 "pcmpeqw %%mm7, %%mm1 \n\t"
00090
00091 "pandn %%mm2, %%mm0 \n\t"
00092 "pandn %%mm3, %%mm1 \n\t"
00093
00094 "movq %%mm0, (%0, %3) \n\t"
00095 "movq %%mm1, 8(%0, %3) \n\t"
00096
00097 "add $16, %3 \n\t"
00098 "jng 1b \n\t"
00099 ::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
00100 : "memory"
00101 );
00102 block[0]= level;
00103 }
00104
00105
00106 static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
00107 DCTELEM *block, int n, int qscale)
00108 {
00109 x86_reg qmul, qadd, nCoeffs;
00110
00111 qmul = qscale << 1;
00112 qadd = (qscale - 1) | 1;
00113
00114 assert(s->block_last_index[n]>=0 || s->h263_aic);
00115
00116 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
00117
00118 __asm__ volatile(
00119 "movd %1, %%mm6 \n\t"
00120 "packssdw %%mm6, %%mm6 \n\t"
00121 "packssdw %%mm6, %%mm6 \n\t"
00122 "movd %2, %%mm5 \n\t"
00123 "pxor %%mm7, %%mm7 \n\t"
00124 "packssdw %%mm5, %%mm5 \n\t"
00125 "packssdw %%mm5, %%mm5 \n\t"
00126 "psubw %%mm5, %%mm7 \n\t"
00127 "pxor %%mm4, %%mm4 \n\t"
00128 ".p2align 4 \n\t"
00129 "1: \n\t"
00130 "movq (%0, %3), %%mm0 \n\t"
00131 "movq 8(%0, %3), %%mm1 \n\t"
00132
00133 "pmullw %%mm6, %%mm0 \n\t"
00134 "pmullw %%mm6, %%mm1 \n\t"
00135
00136 "movq (%0, %3), %%mm2 \n\t"
00137 "movq 8(%0, %3), %%mm3 \n\t"
00138
00139 "pcmpgtw %%mm4, %%mm2 \n\t"
00140 "pcmpgtw %%mm4, %%mm3 \n\t"
00141
00142 "pxor %%mm2, %%mm0 \n\t"
00143 "pxor %%mm3, %%mm1 \n\t"
00144
00145 "paddw %%mm7, %%mm0 \n\t"
00146 "paddw %%mm7, %%mm1 \n\t"
00147
00148 "pxor %%mm0, %%mm2 \n\t"
00149 "pxor %%mm1, %%mm3 \n\t"
00150
00151 "pcmpeqw %%mm7, %%mm0 \n\t"
00152 "pcmpeqw %%mm7, %%mm1 \n\t"
00153
00154 "pandn %%mm2, %%mm0 \n\t"
00155 "pandn %%mm3, %%mm1 \n\t"
00156
00157 "movq %%mm0, (%0, %3) \n\t"
00158 "movq %%mm1, 8(%0, %3) \n\t"
00159
00160 "add $16, %3 \n\t"
00161 "jng 1b \n\t"
00162 ::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
00163 : "memory"
00164 );
00165 }
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197 static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
00198 DCTELEM *block, int n, int qscale)
00199 {
00200 x86_reg nCoeffs;
00201 const uint16_t *quant_matrix;
00202 int block0;
00203
00204 assert(s->block_last_index[n]>=0);
00205
00206 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
00207
00208 if (n < 4)
00209 block0 = block[0] * s->y_dc_scale;
00210 else
00211 block0 = block[0] * s->c_dc_scale;
00212
00213 quant_matrix = s->intra_matrix;
00214 __asm__ volatile(
00215 "pcmpeqw %%mm7, %%mm7 \n\t"
00216 "psrlw $15, %%mm7 \n\t"
00217 "movd %2, %%mm6 \n\t"
00218 "packssdw %%mm6, %%mm6 \n\t"
00219 "packssdw %%mm6, %%mm6 \n\t"
00220 "mov %3, %%"REG_a" \n\t"
00221 ".p2align 4 \n\t"
00222 "1: \n\t"
00223 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00224 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00225 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00226 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00227 "pmullw %%mm6, %%mm4 \n\t"
00228 "pmullw %%mm6, %%mm5 \n\t"
00229 "pxor %%mm2, %%mm2 \n\t"
00230 "pxor %%mm3, %%mm3 \n\t"
00231 "pcmpgtw %%mm0, %%mm2 \n\t"
00232 "pcmpgtw %%mm1, %%mm3 \n\t"
00233 "pxor %%mm2, %%mm0 \n\t"
00234 "pxor %%mm3, %%mm1 \n\t"
00235 "psubw %%mm2, %%mm0 \n\t"
00236 "psubw %%mm3, %%mm1 \n\t"
00237 "pmullw %%mm4, %%mm0 \n\t"
00238 "pmullw %%mm5, %%mm1 \n\t"
00239 "pxor %%mm4, %%mm4 \n\t"
00240 "pxor %%mm5, %%mm5 \n\t"
00241 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00242 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00243 "psraw $3, %%mm0 \n\t"
00244 "psraw $3, %%mm1 \n\t"
00245 "psubw %%mm7, %%mm0 \n\t"
00246 "psubw %%mm7, %%mm1 \n\t"
00247 "por %%mm7, %%mm0 \n\t"
00248 "por %%mm7, %%mm1 \n\t"
00249 "pxor %%mm2, %%mm0 \n\t"
00250 "pxor %%mm3, %%mm1 \n\t"
00251 "psubw %%mm2, %%mm0 \n\t"
00252 "psubw %%mm3, %%mm1 \n\t"
00253 "pandn %%mm0, %%mm4 \n\t"
00254 "pandn %%mm1, %%mm5 \n\t"
00255 "movq %%mm4, (%0, %%"REG_a") \n\t"
00256 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00257
00258 "add $16, %%"REG_a" \n\t"
00259 "js 1b \n\t"
00260 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
00261 : "%"REG_a, "memory"
00262 );
00263 block[0]= block0;
00264 }
00265
00266 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
00267 DCTELEM *block, int n, int qscale)
00268 {
00269 x86_reg nCoeffs;
00270 const uint16_t *quant_matrix;
00271
00272 assert(s->block_last_index[n]>=0);
00273
00274 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
00275
00276 quant_matrix = s->inter_matrix;
00277 __asm__ volatile(
00278 "pcmpeqw %%mm7, %%mm7 \n\t"
00279 "psrlw $15, %%mm7 \n\t"
00280 "movd %2, %%mm6 \n\t"
00281 "packssdw %%mm6, %%mm6 \n\t"
00282 "packssdw %%mm6, %%mm6 \n\t"
00283 "mov %3, %%"REG_a" \n\t"
00284 ".p2align 4 \n\t"
00285 "1: \n\t"
00286 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00287 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00288 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00289 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00290 "pmullw %%mm6, %%mm4 \n\t"
00291 "pmullw %%mm6, %%mm5 \n\t"
00292 "pxor %%mm2, %%mm2 \n\t"
00293 "pxor %%mm3, %%mm3 \n\t"
00294 "pcmpgtw %%mm0, %%mm2 \n\t"
00295 "pcmpgtw %%mm1, %%mm3 \n\t"
00296 "pxor %%mm2, %%mm0 \n\t"
00297 "pxor %%mm3, %%mm1 \n\t"
00298 "psubw %%mm2, %%mm0 \n\t"
00299 "psubw %%mm3, %%mm1 \n\t"
00300 "paddw %%mm0, %%mm0 \n\t"
00301 "paddw %%mm1, %%mm1 \n\t"
00302 "paddw %%mm7, %%mm0 \n\t"
00303 "paddw %%mm7, %%mm1 \n\t"
00304 "pmullw %%mm4, %%mm0 \n\t"
00305 "pmullw %%mm5, %%mm1 \n\t"
00306 "pxor %%mm4, %%mm4 \n\t"
00307 "pxor %%mm5, %%mm5 \n\t"
00308 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00309 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00310 "psraw $4, %%mm0 \n\t"
00311 "psraw $4, %%mm1 \n\t"
00312 "psubw %%mm7, %%mm0 \n\t"
00313 "psubw %%mm7, %%mm1 \n\t"
00314 "por %%mm7, %%mm0 \n\t"
00315 "por %%mm7, %%mm1 \n\t"
00316 "pxor %%mm2, %%mm0 \n\t"
00317 "pxor %%mm3, %%mm1 \n\t"
00318 "psubw %%mm2, %%mm0 \n\t"
00319 "psubw %%mm3, %%mm1 \n\t"
00320 "pandn %%mm0, %%mm4 \n\t"
00321 "pandn %%mm1, %%mm5 \n\t"
00322 "movq %%mm4, (%0, %%"REG_a") \n\t"
00323 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00324
00325 "add $16, %%"REG_a" \n\t"
00326 "js 1b \n\t"
00327 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
00328 : "%"REG_a, "memory"
00329 );
00330 }
00331
00332 static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
00333 DCTELEM *block, int n, int qscale)
00334 {
00335 x86_reg nCoeffs;
00336 const uint16_t *quant_matrix;
00337 int block0;
00338
00339 assert(s->block_last_index[n]>=0);
00340
00341 if(s->alternate_scan) nCoeffs= 63;
00342 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
00343
00344 if (n < 4)
00345 block0 = block[0] * s->y_dc_scale;
00346 else
00347 block0 = block[0] * s->c_dc_scale;
00348 quant_matrix = s->intra_matrix;
00349 __asm__ volatile(
00350 "pcmpeqw %%mm7, %%mm7 \n\t"
00351 "psrlw $15, %%mm7 \n\t"
00352 "movd %2, %%mm6 \n\t"
00353 "packssdw %%mm6, %%mm6 \n\t"
00354 "packssdw %%mm6, %%mm6 \n\t"
00355 "mov %3, %%"REG_a" \n\t"
00356 ".p2align 4 \n\t"
00357 "1: \n\t"
00358 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00359 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00360 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00361 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00362 "pmullw %%mm6, %%mm4 \n\t"
00363 "pmullw %%mm6, %%mm5 \n\t"
00364 "pxor %%mm2, %%mm2 \n\t"
00365 "pxor %%mm3, %%mm3 \n\t"
00366 "pcmpgtw %%mm0, %%mm2 \n\t"
00367 "pcmpgtw %%mm1, %%mm3 \n\t"
00368 "pxor %%mm2, %%mm0 \n\t"
00369 "pxor %%mm3, %%mm1 \n\t"
00370 "psubw %%mm2, %%mm0 \n\t"
00371 "psubw %%mm3, %%mm1 \n\t"
00372 "pmullw %%mm4, %%mm0 \n\t"
00373 "pmullw %%mm5, %%mm1 \n\t"
00374 "pxor %%mm4, %%mm4 \n\t"
00375 "pxor %%mm5, %%mm5 \n\t"
00376 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00377 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00378 "psraw $3, %%mm0 \n\t"
00379 "psraw $3, %%mm1 \n\t"
00380 "pxor %%mm2, %%mm0 \n\t"
00381 "pxor %%mm3, %%mm1 \n\t"
00382 "psubw %%mm2, %%mm0 \n\t"
00383 "psubw %%mm3, %%mm1 \n\t"
00384 "pandn %%mm0, %%mm4 \n\t"
00385 "pandn %%mm1, %%mm5 \n\t"
00386 "movq %%mm4, (%0, %%"REG_a") \n\t"
00387 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00388
00389 "add $16, %%"REG_a" \n\t"
00390 "jng 1b \n\t"
00391 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
00392 : "%"REG_a, "memory"
00393 );
00394 block[0]= block0;
00395
00396 }
00397
00398 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
00399 DCTELEM *block, int n, int qscale)
00400 {
00401 x86_reg nCoeffs;
00402 const uint16_t *quant_matrix;
00403
00404 assert(s->block_last_index[n]>=0);
00405
00406 if(s->alternate_scan) nCoeffs= 63;
00407 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
00408
00409 quant_matrix = s->inter_matrix;
00410 __asm__ volatile(
00411 "pcmpeqw %%mm7, %%mm7 \n\t"
00412 "psrlq $48, %%mm7 \n\t"
00413 "movd %2, %%mm6 \n\t"
00414 "packssdw %%mm6, %%mm6 \n\t"
00415 "packssdw %%mm6, %%mm6 \n\t"
00416 "mov %3, %%"REG_a" \n\t"
00417 ".p2align 4 \n\t"
00418 "1: \n\t"
00419 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00420 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00421 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00422 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00423 "pmullw %%mm6, %%mm4 \n\t"
00424 "pmullw %%mm6, %%mm5 \n\t"
00425 "pxor %%mm2, %%mm2 \n\t"
00426 "pxor %%mm3, %%mm3 \n\t"
00427 "pcmpgtw %%mm0, %%mm2 \n\t"
00428 "pcmpgtw %%mm1, %%mm3 \n\t"
00429 "pxor %%mm2, %%mm0 \n\t"
00430 "pxor %%mm3, %%mm1 \n\t"
00431 "psubw %%mm2, %%mm0 \n\t"
00432 "psubw %%mm3, %%mm1 \n\t"
00433 "paddw %%mm0, %%mm0 \n\t"
00434 "paddw %%mm1, %%mm1 \n\t"
00435 "pmullw %%mm4, %%mm0 \n\t"
00436 "pmullw %%mm5, %%mm1 \n\t"
00437 "paddw %%mm4, %%mm0 \n\t"
00438 "paddw %%mm5, %%mm1 \n\t"
00439 "pxor %%mm4, %%mm4 \n\t"
00440 "pxor %%mm5, %%mm5 \n\t"
00441 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00442 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00443 "psrlw $4, %%mm0 \n\t"
00444 "psrlw $4, %%mm1 \n\t"
00445 "pxor %%mm2, %%mm0 \n\t"
00446 "pxor %%mm3, %%mm1 \n\t"
00447 "psubw %%mm2, %%mm0 \n\t"
00448 "psubw %%mm3, %%mm1 \n\t"
00449 "pandn %%mm0, %%mm4 \n\t"
00450 "pandn %%mm1, %%mm5 \n\t"
00451 "pxor %%mm4, %%mm7 \n\t"
00452 "pxor %%mm5, %%mm7 \n\t"
00453 "movq %%mm4, (%0, %%"REG_a") \n\t"
00454 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00455
00456 "add $16, %%"REG_a" \n\t"
00457 "jng 1b \n\t"
00458 "movd 124(%0, %3), %%mm0 \n\t"
00459 "movq %%mm7, %%mm6 \n\t"
00460 "psrlq $32, %%mm7 \n\t"
00461 "pxor %%mm6, %%mm7 \n\t"
00462 "movq %%mm7, %%mm6 \n\t"
00463 "psrlq $16, %%mm7 \n\t"
00464 "pxor %%mm6, %%mm7 \n\t"
00465 "pslld $31, %%mm7 \n\t"
00466 "psrlq $15, %%mm7 \n\t"
00467 "pxor %%mm7, %%mm0 \n\t"
00468 "movd %%mm0, 124(%0, %3) \n\t"
00469
00470 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs)
00471 : "%"REG_a, "memory"
00472 );
00473 }
00474
00475 static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
00476 const int intra= s->mb_intra;
00477 int *sum= s->dct_error_sum[intra];
00478 uint16_t *offset= s->dct_offset[intra];
00479
00480 s->dct_count[intra]++;
00481
00482 __asm__ volatile(
00483 "pxor %%mm7, %%mm7 \n\t"
00484 "1: \n\t"
00485 "pxor %%mm0, %%mm0 \n\t"
00486 "pxor %%mm1, %%mm1 \n\t"
00487 "movq (%0), %%mm2 \n\t"
00488 "movq 8(%0), %%mm3 \n\t"
00489 "pcmpgtw %%mm2, %%mm0 \n\t"
00490 "pcmpgtw %%mm3, %%mm1 \n\t"
00491 "pxor %%mm0, %%mm2 \n\t"
00492 "pxor %%mm1, %%mm3 \n\t"
00493 "psubw %%mm0, %%mm2 \n\t"
00494 "psubw %%mm1, %%mm3 \n\t"
00495 "movq %%mm2, %%mm4 \n\t"
00496 "movq %%mm3, %%mm5 \n\t"
00497 "psubusw (%2), %%mm2 \n\t"
00498 "psubusw 8(%2), %%mm3 \n\t"
00499 "pxor %%mm0, %%mm2 \n\t"
00500 "pxor %%mm1, %%mm3 \n\t"
00501 "psubw %%mm0, %%mm2 \n\t"
00502 "psubw %%mm1, %%mm3 \n\t"
00503 "movq %%mm2, (%0) \n\t"
00504 "movq %%mm3, 8(%0) \n\t"
00505 "movq %%mm4, %%mm2 \n\t"
00506 "movq %%mm5, %%mm3 \n\t"
00507 "punpcklwd %%mm7, %%mm4 \n\t"
00508 "punpckhwd %%mm7, %%mm2 \n\t"
00509 "punpcklwd %%mm7, %%mm5 \n\t"
00510 "punpckhwd %%mm7, %%mm3 \n\t"
00511 "paddd (%1), %%mm4 \n\t"
00512 "paddd 8(%1), %%mm2 \n\t"
00513 "paddd 16(%1), %%mm5 \n\t"
00514 "paddd 24(%1), %%mm3 \n\t"
00515 "movq %%mm4, (%1) \n\t"
00516 "movq %%mm2, 8(%1) \n\t"
00517 "movq %%mm5, 16(%1) \n\t"
00518 "movq %%mm3, 24(%1) \n\t"
00519 "add $16, %0 \n\t"
00520 "add $32, %1 \n\t"
00521 "add $16, %2 \n\t"
00522 "cmp %3, %0 \n\t"
00523 " jb 1b \n\t"
00524 : "+r" (block), "+r" (sum), "+r" (offset)
00525 : "r"(block+64)
00526 );
00527 }
00528
00529 static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
00530 const int intra= s->mb_intra;
00531 int *sum= s->dct_error_sum[intra];
00532 uint16_t *offset= s->dct_offset[intra];
00533
00534 s->dct_count[intra]++;
00535
00536 __asm__ volatile(
00537 "pxor %%xmm7, %%xmm7 \n\t"
00538 "1: \n\t"
00539 "pxor %%xmm0, %%xmm0 \n\t"
00540 "pxor %%xmm1, %%xmm1 \n\t"
00541 "movdqa (%0), %%xmm2 \n\t"
00542 "movdqa 16(%0), %%xmm3 \n\t"
00543 "pcmpgtw %%xmm2, %%xmm0 \n\t"
00544 "pcmpgtw %%xmm3, %%xmm1 \n\t"
00545 "pxor %%xmm0, %%xmm2 \n\t"
00546 "pxor %%xmm1, %%xmm3 \n\t"
00547 "psubw %%xmm0, %%xmm2 \n\t"
00548 "psubw %%xmm1, %%xmm3 \n\t"
00549 "movdqa %%xmm2, %%xmm4 \n\t"
00550 "movdqa %%xmm3, %%xmm5 \n\t"
00551 "psubusw (%2), %%xmm2 \n\t"
00552 "psubusw 16(%2), %%xmm3 \n\t"
00553 "pxor %%xmm0, %%xmm2 \n\t"
00554 "pxor %%xmm1, %%xmm3 \n\t"
00555 "psubw %%xmm0, %%xmm2 \n\t"
00556 "psubw %%xmm1, %%xmm3 \n\t"
00557 "movdqa %%xmm2, (%0) \n\t"
00558 "movdqa %%xmm3, 16(%0) \n\t"
00559 "movdqa %%xmm4, %%xmm6 \n\t"
00560 "movdqa %%xmm5, %%xmm0 \n\t"
00561 "punpcklwd %%xmm7, %%xmm4 \n\t"
00562 "punpckhwd %%xmm7, %%xmm6 \n\t"
00563 "punpcklwd %%xmm7, %%xmm5 \n\t"
00564 "punpckhwd %%xmm7, %%xmm0 \n\t"
00565 "paddd (%1), %%xmm4 \n\t"
00566 "paddd 16(%1), %%xmm6 \n\t"
00567 "paddd 32(%1), %%xmm5 \n\t"
00568 "paddd 48(%1), %%xmm0 \n\t"
00569 "movdqa %%xmm4, (%1) \n\t"
00570 "movdqa %%xmm6, 16(%1) \n\t"
00571 "movdqa %%xmm5, 32(%1) \n\t"
00572 "movdqa %%xmm0, 48(%1) \n\t"
00573 "add $32, %0 \n\t"
00574 "add $64, %1 \n\t"
00575 "add $32, %2 \n\t"
00576 "cmp %3, %0 \n\t"
00577 " jb 1b \n\t"
00578 : "+r" (block), "+r" (sum), "+r" (offset)
00579 : "r"(block+64)
00580 XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
00581 "%xmm4", "%xmm5", "%xmm6", "%xmm7")
00582 );
00583 }
00584
00585 #endif
00586
00587 void ff_MPV_common_init_x86(MpegEncContext *s)
00588 {
00589 #if HAVE_INLINE_ASM
00590 int mm_flags = av_get_cpu_flags();
00591
00592 if (mm_flags & AV_CPU_FLAG_MMX) {
00593 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
00594 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
00595 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
00596 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
00597 if(!(s->flags & CODEC_FLAG_BITEXACT))
00598 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
00599 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
00600
00601 if (mm_flags & AV_CPU_FLAG_SSE2) {
00602 s->denoise_dct= denoise_dct_sse2;
00603 } else {
00604 s->denoise_dct= denoise_dct_mmx;
00605 }
00606 }
00607 #endif
00608 }