From dd9765bf4ca9f94a3d44b205c247a84d44f6b8bf Mon Sep 17 00:00:00 2001
From: XhmikosR <xhmikosr@users.sourceforge.net>
Date: Thu, 16 Aug 2012 23:46:54 +0300
Subject: [PATCH] rename the remaining instances of HAVE_MMX2 to HAVE_MMXEXT
---
configure | 1 -
libavfilter/libmpcodecs/libvo/fastmemcpy.h | 2 +-
libavfilter/libmpcodecs/vf_fspp.c | 2 +-
libavfilter/libmpcodecs/vf_noise.c | 6 +-
libavfilter/libmpcodecs/vf_pp7.c | 2 +-
libavfilter/libmpcodecs/vf_spp.c | 2 +-
libavfilter/libmpcodecs/vf_unsharp.c | 2 +-
libavfilter/libmpcodecs/vf_uspp.c | 2 +-
libavutil/utils.c | 1 -
libpostproc/postprocess.c | 22 +++----
libpostproc/postprocess_template.c | 98 +++++++++++++++---------------
libswscale/swscale.c | 2 +-
libswscale/x86/swscale.c | 2 +-
13 files changed, 71 insertions(+), 73 deletions(-)
diff --git a/configure b/configure
index 10973b9..487da66 100755
|
a
|
b
|
cat > $TMPH <<EOF |
| 4075 | 4075 | #define EXTERN_PREFIX "${extern_prefix}" |
| 4076 | 4076 | #define EXTERN_ASM ${extern_prefix} |
| 4077 | 4077 | #define SLIBSUF "$SLIBSUF" |
| 4078 | | #define HAVE_MMX2 HAVE_MMXEXT |
| 4079 | 4078 | EOF |
| 4080 | 4079 | |
| 4081 | 4080 | test -n "$assert_level" && |
diff --git a/libavfilter/libmpcodecs/libvo/fastmemcpy.h b/libavfilter/libmpcodecs/libvo/fastmemcpy.h
index 5a17d01..f1a9127 100644
|
a
|
b
|
|
| 26 | 26 | void * fast_memcpy(void * to, const void * from, size_t len); |
| 27 | 27 | void * mem2agpcpy(void * to, const void * from, size_t len); |
| 28 | 28 | |
| 29 | | #if ! defined(CONFIG_FASTMEMCPY) || ! (HAVE_MMX || HAVE_MMX2 || HAVE_AMD3DNOW /* || HAVE_SSE || HAVE_SSE2 */) |
| | 29 | #if ! defined(CONFIG_FASTMEMCPY) || ! (HAVE_MMX || HAVE_MMXEXT || HAVE_AMD3DNOW /* || HAVE_SSE || HAVE_SSE2 */) |
| 30 | 30 | #define mem2agpcpy(a,b,c) memcpy(a,b,c) |
| 31 | 31 | #define fast_memcpy(a,b,c) memcpy(a,b,c) |
| 32 | 32 | #endif |
diff --git a/libavfilter/libmpcodecs/vf_fspp.c b/libavfilter/libmpcodecs/vf_fspp.c
index 3653187..2ef7e19 100644
|
a
|
b
|
static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) |
| 566 | 566 | #if HAVE_MMX |
| 567 | 567 | if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t"); |
| 568 | 568 | #endif |
| 569 | | #if HAVE_MMX2 |
| | 569 | #if HAVE_MMXEXT |
| 570 | 570 | if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t"); |
| 571 | 571 | #endif |
| 572 | 572 | return vf_next_put_image(vf,dmpi, pts); |
diff --git a/libavfilter/libmpcodecs/vf_noise.c b/libavfilter/libmpcodecs/vf_noise.c
index 9521619..9218ff8 100644
|
a
|
b
|
static inline void lineNoise_MMX(uint8_t *dst, uint8_t *src, int8_t *noise, int |
| 175 | 175 | #endif |
| 176 | 176 | |
| 177 | 177 | //duplicate of previous except movntq |
| 178 | | #if HAVE_MMX2 |
| | 178 | #if HAVE_MMXEXT |
| 179 | 179 | static inline void lineNoise_MMX2(uint8_t *dst, uint8_t *src, int8_t *noise, int len, int shift){ |
| 180 | 180 | x86_reg mmx_len= len&(~7); |
| 181 | 181 | noise+=shift; |
| … |
… |
static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){ |
| 360 | 360 | #if HAVE_MMX |
| 361 | 361 | if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t"); |
| 362 | 362 | #endif |
| 363 | | #if HAVE_MMX2 |
| | 363 | #if HAVE_MMXEXT |
| 364 | 364 | if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t"); |
| 365 | 365 | #endif |
| 366 | 366 | |
| … |
… |
static int vf_open(vf_instance_t *vf, char *args){ |
| 454 | 454 | lineNoiseAvg= lineNoiseAvg_MMX; |
| 455 | 455 | } |
| 456 | 456 | #endif |
| 457 | | #if HAVE_MMX2 |
| | 457 | #if HAVE_MMXEXT |
| 458 | 458 | if(gCpuCaps.hasMMX2) lineNoise= lineNoise_MMX2; |
| 459 | 459 | // if(gCpuCaps.hasMMX) lineNoiseAvg= lineNoiseAvg_MMX2; |
| 460 | 460 | #endif |
diff --git a/libavfilter/libmpcodecs/vf_pp7.c b/libavfilter/libmpcodecs/vf_pp7.c
index eae30bf..9bcc7ab 100644
|
a
|
b
|
static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){ |
| 402 | 402 | #if HAVE_MMX |
| 403 | 403 | if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t"); |
| 404 | 404 | #endif |
| 405 | | #if HAVE_MMX2 |
| | 405 | #if HAVE_MMXEXT |
| 406 | 406 | if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t"); |
| 407 | 407 | #endif |
| 408 | 408 | |
diff --git a/libavfilter/libmpcodecs/vf_spp.c b/libavfilter/libmpcodecs/vf_spp.c
index 0b4b230..e747d74 100644
|
a
|
b
|
static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){ |
| 510 | 510 | #if HAVE_MMX |
| 511 | 511 | if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t"); |
| 512 | 512 | #endif |
| 513 | | #if HAVE_MMX2 |
| | 513 | #if HAVE_MMXEXT |
| 514 | 514 | if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t"); |
| 515 | 515 | #endif |
| 516 | 516 | |
diff --git a/libavfilter/libmpcodecs/vf_unsharp.c b/libavfilter/libmpcodecs/vf_unsharp.c
index db22f78..75d6e5b 100644
|
a
|
b
|
static int put_image( struct vf_instance *vf, mp_image_t *mpi, double pts) { |
| 196 | 196 | if(gCpuCaps.hasMMX) |
| 197 | 197 | __asm__ volatile ("emms\n\t"); |
| 198 | 198 | #endif |
| 199 | | #if HAVE_MMX2 |
| | 199 | #if HAVE_MMXEXT |
| 200 | 200 | if(gCpuCaps.hasMMX2) |
| 201 | 201 | __asm__ volatile ("sfence\n\t"); |
| 202 | 202 | #endif |
diff --git a/libavfilter/libmpcodecs/vf_uspp.c b/libavfilter/libmpcodecs/vf_uspp.c
index cc9a3db..239f671 100644
|
a
|
b
|
static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){ |
| 291 | 291 | #if HAVE_MMX |
| 292 | 292 | if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t"); |
| 293 | 293 | #endif |
| 294 | | #if HAVE_MMX2 |
| | 294 | #if HAVE_MMXEXT |
| 295 | 295 | if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t"); |
| 296 | 296 | #endif |
| 297 | 297 | |
diff --git a/libavutil/utils.c b/libavutil/utils.c
index 127c4b1..971b48b 100644
|
a
|
b
|
unsigned avutil_version(void) |
| 33 | 33 | av_assert0(AVMEDIA_TYPE_ATTACHMENT == 4); |
| 34 | 34 | av_assert0(AV_PICTURE_TYPE_BI == 7); |
| 35 | 35 | av_assert0(LIBAVUTIL_VERSION_MICRO >= 100); |
| 36 | | av_assert0(HAVE_MMX2 == HAVE_MMXEXT); |
| 37 | 36 | |
| 38 | 37 | return LIBAVUTIL_VERSION_INT; |
| 39 | 38 | } |
diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index 2234ba8..3c3f68c 100644
|
a
|
b
|
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
| 80 | 80 | #include <stdio.h> |
| 81 | 81 | #include <stdlib.h> |
| 82 | 82 | #include <string.h> |
| 83 | | //#undef HAVE_MMX2 |
| | 83 | //#undef HAVE_MMXEXT |
| 84 | 84 | //#define HAVE_AMD3DNOW |
| 85 | 85 | //#undef HAVE_MMX |
| 86 | 86 | //#undef ARCH_X86 |
| … |
… |
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, |
| 546 | 546 | |
| 547 | 547 | #if ARCH_X86 |
| 548 | 548 | |
| 549 | | #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT |
| | 549 | #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMXEXT) || CONFIG_RUNTIME_CPUDETECT |
| 550 | 550 | #define COMPILE_MMX |
| 551 | 551 | #endif |
| 552 | 552 | |
| 553 | | #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT |
| | 553 | #if HAVE_MMXEXT || CONFIG_RUNTIME_CPUDETECT |
| 554 | 554 | #define COMPILE_MMX2 |
| 555 | 555 | #endif |
| 556 | 556 | |
| 557 | | #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT |
| | 557 | #if (HAVE_AMD3DNOW && !HAVE_MMXEXT) || CONFIG_RUNTIME_CPUDETECT |
| 558 | 558 | #define COMPILE_3DNOW |
| 559 | 559 | #endif |
| 560 | 560 | #endif /* ARCH_X86 */ |
| 561 | 561 | |
| 562 | 562 | #undef HAVE_MMX |
| 563 | 563 | #define HAVE_MMX 0 |
| 564 | | #undef HAVE_MMX2 |
| 565 | | #define HAVE_MMX2 0 |
| | 564 | #undef HAVE_MMXEXT |
| | 565 | #define HAVE_MMXEXT 0 |
| 566 | 566 | #undef HAVE_AMD3DNOW |
| 567 | 567 | #define HAVE_AMD3DNOW 0 |
| 568 | 568 | #undef HAVE_ALTIVEC |
| … |
… |
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, |
| 595 | 595 | #ifdef COMPILE_MMX2 |
| 596 | 596 | #undef RENAME |
| 597 | 597 | #undef HAVE_MMX |
| 598 | | #undef HAVE_MMX2 |
| | 598 | #undef HAVE_MMXEXT |
| 599 | 599 | #define HAVE_MMX 1 |
| 600 | | #define HAVE_MMX2 1 |
| | 600 | #define HAVE_MMXEXT 1 |
| 601 | 601 | #define RENAME(a) a ## _MMX2 |
| 602 | 602 | #include "postprocess_template.c" |
| 603 | 603 | #endif |
| … |
… |
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, |
| 606 | 606 | #ifdef COMPILE_3DNOW |
| 607 | 607 | #undef RENAME |
| 608 | 608 | #undef HAVE_MMX |
| 609 | | #undef HAVE_MMX2 |
| | 609 | #undef HAVE_MMXEXT |
| 610 | 610 | #undef HAVE_AMD3DNOW |
| 611 | 611 | #define HAVE_MMX 1 |
| 612 | | #define HAVE_MMX2 0 |
| | 612 | #define HAVE_MMXEXT 0 |
| 613 | 613 | #define HAVE_AMD3DNOW 1 |
| 614 | 614 | #define RENAME(a) a ## _3DNow |
| 615 | 615 | #include "postprocess_template.c" |
| … |
… |
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[] |
| 652 | 652 | postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 653 | 653 | #endif |
| 654 | 654 | #else /* CONFIG_RUNTIME_CPUDETECT */ |
| 655 | | #if HAVE_MMX2 |
| | 655 | #if HAVE_MMXEXT |
| 656 | 656 | postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
| 657 | 657 | #elif HAVE_AMD3DNOW |
| 658 | 658 | postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 0fb2da4..a6752df 100644
|
a
|
b
|
|
| 30 | 30 | #undef PMINUB |
| 31 | 31 | #undef PMAXUB |
| 32 | 32 | |
| 33 | | #if HAVE_MMX2 |
| | 33 | #if HAVE_MMXEXT |
| 34 | 34 | #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" |
| 35 | 35 | #elif HAVE_AMD3DNOW |
| 36 | 36 | #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" |
| 37 | 37 | #endif |
| 38 | 38 | #define PAVGB(a,b) REAL_PAVGB(a,b) |
| 39 | 39 | |
| 40 | | #if HAVE_MMX2 |
| | 40 | #if HAVE_MMXEXT |
| 41 | 41 | #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" |
| 42 | 42 | #elif HAVE_MMX |
| 43 | 43 | #define PMINUB(b,a,t) \ |
| … |
… |
|
| 46 | 46 | "psubb " #t ", " #a " \n\t" |
| 47 | 47 | #endif |
| 48 | 48 | |
| 49 | | #if HAVE_MMX2 |
| | 49 | #if HAVE_MMXEXT |
| 50 | 50 | #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" |
| 51 | 51 | #elif HAVE_MMX |
| 52 | 52 | #define PMAXUB(a,b) \ |
| … |
… |
static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ |
| 135 | 135 | "psubusb %%mm3, %%mm4 \n\t" |
| 136 | 136 | |
| 137 | 137 | " \n\t" |
| 138 | | #if HAVE_MMX2 |
| | 138 | #if HAVE_MMXEXT |
| 139 | 139 | "pxor %%mm7, %%mm7 \n\t" |
| 140 | 140 | "psadbw %%mm7, %%mm0 \n\t" |
| 141 | 141 | #else |
| … |
… |
static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ |
| 178 | 178 | #if !HAVE_ALTIVEC |
| 179 | 179 | static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
| 180 | 180 | { |
| 181 | | #if HAVE_MMX2 || HAVE_AMD3DNOW |
| | 181 | #if HAVE_MMXEXT || HAVE_AMD3DNOW |
| 182 | 182 | src+= stride*3; |
| 183 | 183 | __asm__ volatile( //"movv %0 %1 %2\n\t" |
| 184 | 184 | "movq %2, %%mm0 \n\t" // QP,..., QP |
| … |
… |
static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
| 305 | 305 | : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) |
| 306 | 306 | : "%"REG_a, "%"REG_c |
| 307 | 307 | ); |
| 308 | | #else //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 308 | #else //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 309 | 309 | const int l1= stride; |
| 310 | 310 | const int l2= stride + l1; |
| 311 | 311 | const int l3= stride + l2; |
| … |
… |
static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
| 344 | 344 | |
| 345 | 345 | src++; |
| 346 | 346 | } |
| 347 | | #endif //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 347 | #endif //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 348 | 348 | } |
| 349 | 349 | #endif //HAVE_ALTIVEC |
| 350 | 350 | |
| … |
… |
static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
| 357 | 357 | */ |
| 358 | 358 | static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
| 359 | 359 | { |
| 360 | | #if HAVE_MMX2 || HAVE_AMD3DNOW |
| | 360 | #if HAVE_MMXEXT || HAVE_AMD3DNOW |
| 361 | 361 | src+= stride*3; |
| 362 | 362 | |
| 363 | 363 | __asm__ volatile( |
| … |
… |
static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
| 443 | 443 | : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) |
| 444 | 444 | : "%"REG_a, "%"REG_c |
| 445 | 445 | ); |
| 446 | | #else //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 446 | #else //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 447 | 447 | |
| 448 | 448 | const int l1= stride; |
| 449 | 449 | const int l2= stride + l1; |
| … |
… |
static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
| 477 | 477 | } |
| 478 | 478 | src++; |
| 479 | 479 | } |
| 480 | | #endif //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 480 | #endif //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 481 | 481 | } |
| 482 | 482 | |
| 483 | 483 | #if !HAVE_ALTIVEC |
| 484 | 484 | static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) |
| 485 | 485 | { |
| 486 | | #if HAVE_MMX2 || HAVE_AMD3DNOW |
| | 486 | #if HAVE_MMXEXT || HAVE_AMD3DNOW |
| 487 | 487 | /* |
| 488 | 488 | uint8_t tmp[16]; |
| 489 | 489 | const int l1= stride; |
| … |
… |
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
| 872 | 872 | "movq (%3), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 |
| 873 | 873 | "movq 8(%3), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 |
| 874 | 874 | |
| 875 | | #if HAVE_MMX2 |
| | 875 | #if HAVE_MMXEXT |
| 876 | 876 | "movq %%mm7, %%mm6 \n\t" // 0 |
| 877 | 877 | "psubw %%mm0, %%mm6 \n\t" |
| 878 | 878 | "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| |
| … |
… |
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
| 904 | 904 | "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| |
| 905 | 905 | #endif |
| 906 | 906 | |
| 907 | | #if HAVE_MMX2 |
| | 907 | #if HAVE_MMXEXT |
| 908 | 908 | "pminsw %%mm2, %%mm0 \n\t" |
| 909 | 909 | "pminsw %%mm3, %%mm1 \n\t" |
| 910 | 910 | #else |
| … |
… |
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
| 968 | 968 | "pand %%mm2, %%mm4 \n\t" |
| 969 | 969 | "pand %%mm3, %%mm5 \n\t" |
| 970 | 970 | |
| 971 | | #if HAVE_MMX2 |
| | 971 | #if HAVE_MMXEXT |
| 972 | 972 | "pminsw %%mm0, %%mm4 \n\t" |
| 973 | 973 | "pminsw %%mm1, %%mm5 \n\t" |
| 974 | 974 | #else |
| … |
… |
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
| 995 | 995 | : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp) |
| 996 | 996 | : "%"REG_a |
| 997 | 997 | ); |
| 998 | | #else //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 998 | #else //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 999 | 999 | const int l1= stride; |
| 1000 | 1000 | const int l2= stride + l1; |
| 1001 | 1001 | const int l3= stride + l2; |
| … |
… |
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
| 1033 | 1033 | } |
| 1034 | 1034 | src++; |
| 1035 | 1035 | } |
| 1036 | | #endif //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1036 | #endif //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1037 | 1037 | } |
| 1038 | 1038 | #endif //HAVE_ALTIVEC |
| 1039 | 1039 | |
| 1040 | 1040 | #if !HAVE_ALTIVEC |
| 1041 | 1041 | static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) |
| 1042 | 1042 | { |
| 1043 | | #if HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1043 | #if HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1044 | 1044 | DECLARE_ALIGNED(8, uint64_t, tmp)[3]; |
| 1045 | 1045 | __asm__ volatile( |
| 1046 | 1046 | "pxor %%mm6, %%mm6 \n\t" |
| … |
… |
static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) |
| 1060 | 1060 | |
| 1061 | 1061 | #undef REAL_FIND_MIN_MAX |
| 1062 | 1062 | #undef FIND_MIN_MAX |
| 1063 | | #if HAVE_MMX2 |
| | 1063 | #if HAVE_MMXEXT |
| 1064 | 1064 | #define REAL_FIND_MIN_MAX(addr)\ |
| 1065 | 1065 | "movq " #addr ", %%mm0 \n\t"\ |
| 1066 | 1066 | "pminub %%mm0, %%mm7 \n\t"\ |
| … |
… |
FIND_MIN_MAX((%0, %1, 8)) |
| 1087 | 1087 | |
| 1088 | 1088 | "movq %%mm7, %%mm4 \n\t" |
| 1089 | 1089 | "psrlq $8, %%mm7 \n\t" |
| 1090 | | #if HAVE_MMX2 |
| | 1090 | #if HAVE_MMXEXT |
| 1091 | 1091 | "pminub %%mm4, %%mm7 \n\t" // min of pixels |
| 1092 | 1092 | "pshufw $0xF9, %%mm7, %%mm4 \n\t" |
| 1093 | 1093 | "pminub %%mm4, %%mm7 \n\t" // min of pixels |
| … |
… |
FIND_MIN_MAX((%0, %1, 8)) |
| 1112 | 1112 | |
| 1113 | 1113 | "movq %%mm6, %%mm4 \n\t" |
| 1114 | 1114 | "psrlq $8, %%mm6 \n\t" |
| 1115 | | #if HAVE_MMX2 |
| | 1115 | #if HAVE_MMXEXT |
| 1116 | 1116 | "pmaxub %%mm4, %%mm6 \n\t" // max of pixels |
| 1117 | 1117 | "pshufw $0xF9, %%mm6, %%mm4 \n\t" |
| 1118 | 1118 | "pmaxub %%mm4, %%mm6 \n\t" |
| … |
… |
DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
| 1266 | 1266 | : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp) |
| 1267 | 1267 | : "%"REG_a, "%"REG_d |
| 1268 | 1268 | ); |
| 1269 | | #else //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1269 | #else //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1270 | 1270 | int y; |
| 1271 | 1271 | int min=255; |
| 1272 | 1272 | int max=0; |
| … |
… |
DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
| 1383 | 1383 | // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; |
| 1384 | 1384 | } |
| 1385 | 1385 | #endif |
| 1386 | | #endif //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1386 | #endif //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1387 | 1387 | } |
| 1388 | 1388 | #endif //HAVE_ALTIVEC |
| 1389 | 1389 | |
| … |
… |
DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
| 1395 | 1395 | */ |
| 1396 | 1396 | static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) |
| 1397 | 1397 | { |
| 1398 | | #if HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1398 | #if HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1399 | 1399 | src+= 4*stride; |
| 1400 | 1400 | __asm__ volatile( |
| 1401 | 1401 | "lea (%0, %1), %%"REG_a" \n\t" |
| … |
… |
static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid |
| 1448 | 1448 | */ |
| 1449 | 1449 | static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) |
| 1450 | 1450 | { |
| 1451 | | #if HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1451 | #if HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1452 | 1452 | src+= stride*3; |
| 1453 | 1453 | __asm__ volatile( |
| 1454 | 1454 | "lea (%0, %1), %%"REG_a" \n\t" |
| … |
… |
DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, |
| 1490 | 1490 | : : "r" (src), "r" ((x86_reg)stride) |
| 1491 | 1491 | : "%"REG_a, "%"REG_d, "%"REG_c |
| 1492 | 1492 | ); |
| 1493 | | #else //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1493 | #else //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1494 | 1494 | int x; |
| 1495 | 1495 | src+= stride*3; |
| 1496 | 1496 | for(x=0; x<8; x++){ |
| … |
… |
DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, |
| 1500 | 1500 | src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); |
| 1501 | 1501 | src++; |
| 1502 | 1502 | } |
| 1503 | | #endif //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1503 | #endif //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1504 | 1504 | } |
| 1505 | 1505 | |
| 1506 | 1506 | /** |
| … |
… |
DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, |
| 1512 | 1512 | */ |
| 1513 | 1513 | static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) |
| 1514 | 1514 | { |
| 1515 | | #if HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1515 | #if HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1516 | 1516 | src+= stride*4; |
| 1517 | 1517 | __asm__ volatile( |
| 1518 | 1518 | "lea (%0, %1), %%"REG_a" \n\t" |
| … |
… |
DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
| 1561 | 1561 | : : "r" (src), "r" ((x86_reg)stride), "r"(tmp) |
| 1562 | 1562 | : "%"REG_a, "%"REG_d |
| 1563 | 1563 | ); |
| 1564 | | #else //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1564 | #else //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1565 | 1565 | int x; |
| 1566 | 1566 | src+= stride*4; |
| 1567 | 1567 | for(x=0; x<8; x++){ |
| … |
… |
DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
| 1579 | 1579 | |
| 1580 | 1580 | src++; |
| 1581 | 1581 | } |
| 1582 | | #endif //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1582 | #endif //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1583 | 1583 | } |
| 1584 | 1584 | |
| 1585 | 1585 | /** |
| … |
… |
DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
| 1591 | 1591 | */ |
| 1592 | 1592 | static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) |
| 1593 | 1593 | { |
| 1594 | | #if HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1594 | #if HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1595 | 1595 | src+= stride*4; |
| 1596 | 1596 | __asm__ volatile( |
| 1597 | 1597 | "lea (%0, %1), %%"REG_a" \n\t" |
| … |
… |
DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
| 1651 | 1651 | : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) |
| 1652 | 1652 | : "%"REG_a, "%"REG_d |
| 1653 | 1653 | ); |
| 1654 | | #else //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1654 | #else //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1655 | 1655 | int x; |
| 1656 | 1656 | src+= stride*4; |
| 1657 | 1657 | for(x=0; x<8; x++){ |
| … |
… |
DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
| 1680 | 1680 | |
| 1681 | 1681 | src++; |
| 1682 | 1682 | } |
| 1683 | | #endif //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1683 | #endif //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1684 | 1684 | } |
| 1685 | 1685 | |
| 1686 | 1686 | /** |
| … |
… |
DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
| 1692 | 1692 | */ |
| 1693 | 1693 | static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) |
| 1694 | 1694 | { |
| 1695 | | #if HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1695 | #if HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1696 | 1696 | src+= 4*stride; |
| 1697 | 1697 | __asm__ volatile( |
| 1698 | 1698 | "lea (%0, %1), %%"REG_a" \n\t" |
| … |
… |
static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin |
| 1739 | 1739 | : : "r" (src), "r" ((x86_reg)stride), "r" (tmp) |
| 1740 | 1740 | : "%"REG_a, "%"REG_d |
| 1741 | 1741 | ); |
| 1742 | | #else //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1742 | #else //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1743 | 1743 | int a, b, c, x; |
| 1744 | 1744 | src+= 4*stride; |
| 1745 | 1745 | |
| … |
… |
static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin |
| 1782 | 1782 | src += 4; |
| 1783 | 1783 | tmp += 4; |
| 1784 | 1784 | } |
| 1785 | | #endif //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 1785 | #endif //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 1786 | 1786 | } |
| 1787 | 1787 | |
| 1788 | 1788 | /** |
| … |
… |
static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) |
| 1795 | 1795 | { |
| 1796 | 1796 | #if HAVE_MMX |
| 1797 | 1797 | src+= 4*stride; |
| 1798 | | #if HAVE_MMX2 |
| | 1798 | #if HAVE_MMXEXT |
| 1799 | 1799 | __asm__ volatile( |
| 1800 | 1800 | "lea (%0, %1), %%"REG_a" \n\t" |
| 1801 | 1801 | "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
| … |
… |
MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) |
| 1885 | 1885 | : : "r" (src), "r" ((x86_reg)stride) |
| 1886 | 1886 | : "%"REG_a, "%"REG_d |
| 1887 | 1887 | ); |
| 1888 | | #endif //HAVE_MMX2 |
| | 1888 | #endif //HAVE_MMXEXT |
| 1889 | 1889 | #else //HAVE_MMX |
| 1890 | 1890 | int x, y; |
| 1891 | 1891 | src+= 4*stride; |
| … |
… |
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, |
| 2087 | 2087 | |
| 2088 | 2088 | #define FAST_L2_DIFF |
| 2089 | 2089 | //#define L1_DIFF //u should change the thresholds too if u try that one |
| 2090 | | #if HAVE_MMX2 || HAVE_AMD3DNOW |
| | 2090 | #if HAVE_MMXEXT || HAVE_AMD3DNOW |
| 2091 | 2091 | __asm__ volatile( |
| 2092 | 2092 | "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride |
| 2093 | 2093 | "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride |
| … |
… |
L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) |
| 2375 | 2375 | :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) |
| 2376 | 2376 | : "%"REG_a, "%"REG_d, "%"REG_c, "memory" |
| 2377 | 2377 | ); |
| 2378 | | #else //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 2378 | #else //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 2379 | 2379 | { |
| 2380 | 2380 | int y; |
| 2381 | 2381 | int d=0; |
| … |
… |
Switch between |
| 2458 | 2458 | } |
| 2459 | 2459 | } |
| 2460 | 2460 | } |
| 2461 | | #endif //HAVE_MMX2 || HAVE_AMD3DNOW |
| | 2461 | #endif //HAVE_MMXEXT || HAVE_AMD3DNOW |
| 2462 | 2462 | } |
| 2463 | 2463 | #endif //HAVE_ALTIVEC |
| 2464 | 2464 | |
| … |
… |
static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
| 2865 | 2865 | "movq (%4), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 |
| 2866 | 2866 | "movq 8(%4), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 |
| 2867 | 2867 | |
| 2868 | | #if HAVE_MMX2 |
| | 2868 | #if HAVE_MMXEXT |
| 2869 | 2869 | "movq %%mm7, %%mm6 \n\t" // 0 |
| 2870 | 2870 | "psubw %%mm0, %%mm6 \n\t" |
| 2871 | 2871 | "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| |
| … |
… |
static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
| 2897 | 2897 | "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| |
| 2898 | 2898 | #endif |
| 2899 | 2899 | |
| 2900 | | #if HAVE_MMX2 |
| | 2900 | #if HAVE_MMXEXT |
| 2901 | 2901 | "pminsw %%mm2, %%mm0 \n\t" |
| 2902 | 2902 | "pminsw %%mm3, %%mm1 \n\t" |
| 2903 | 2903 | #else |
| … |
… |
static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
| 2961 | 2961 | "pand %%mm2, %%mm4 \n\t" |
| 2962 | 2962 | "pand %%mm3, %%mm5 \n\t" |
| 2963 | 2963 | |
| 2964 | | #if HAVE_MMX2 |
| | 2964 | #if HAVE_MMXEXT |
| 2965 | 2965 | "pminsw %%mm0, %%mm4 \n\t" |
| 2966 | 2966 | "pminsw %%mm1, %%mm5 \n\t" |
| 2967 | 2967 | #else |
| … |
… |
static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t |
| 3024 | 3024 | "lea (%2,%4), %%"REG_a" \n\t" |
| 3025 | 3025 | "lea (%3,%5), %%"REG_d" \n\t" |
| 3026 | 3026 | "pxor %%mm4, %%mm4 \n\t" |
| 3027 | | #if HAVE_MMX2 |
| | 3027 | #if HAVE_MMXEXT |
| 3028 | 3028 | #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ |
| 3029 | 3029 | "movq " #src1 ", %%mm0 \n\t"\ |
| 3030 | 3030 | "movq " #src1 ", %%mm5 \n\t"\ |
| … |
… |
static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t |
| 3047 | 3047 | "movq %%mm0, " #dst1 " \n\t"\ |
| 3048 | 3048 | "movq %%mm1, " #dst2 " \n\t"\ |
| 3049 | 3049 | |
| 3050 | | #else //HAVE_MMX2 |
| | 3050 | #else //HAVE_MMXEXT |
| 3051 | 3051 | #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ |
| 3052 | 3052 | "movq " #src1 ", %%mm0 \n\t"\ |
| 3053 | 3053 | "movq " #src1 ", %%mm5 \n\t"\ |
| … |
… |
static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t |
| 3074 | 3074 | "movq %%mm0, " #dst1 " \n\t"\ |
| 3075 | 3075 | "movq %%mm1, " #dst2 " \n\t"\ |
| 3076 | 3076 | |
| 3077 | | #endif //HAVE_MMX2 |
| | 3077 | #endif //HAVE_MMXEXT |
| 3078 | 3078 | #define SCALED_CPY(src1, src2, dst1, dst2)\ |
| 3079 | 3079 | REAL_SCALED_CPY(src1, src2, dst1, dst2) |
| 3080 | 3080 | |
| … |
… |
static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
| 3248 | 3248 | |
| 3249 | 3249 | scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black); |
| 3250 | 3250 | |
| 3251 | | #if HAVE_MMX2 |
| | 3251 | #if HAVE_MMXEXT |
| 3252 | 3252 | c.packedYScale= (uint16_t)(scale*256.0 + 0.5); |
| 3253 | 3253 | c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF; |
| 3254 | 3254 | #else |
| … |
… |
static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
| 3281 | 3281 | // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing |
| 3282 | 3282 | for(x=0; x<width; x+=BLOCK_SIZE){ |
| 3283 | 3283 | |
| 3284 | | #if HAVE_MMX2 |
| | 3284 | #if HAVE_MMXEXT |
| 3285 | 3285 | /* |
| 3286 | 3286 | prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); |
| 3287 | 3287 | prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); |
| … |
… |
static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
| 3417 | 3417 | #endif |
| 3418 | 3418 | |
| 3419 | 3419 | |
| 3420 | | #if HAVE_MMX2 |
| | 3420 | #if HAVE_MMXEXT |
| 3421 | 3421 | /* |
| 3422 | 3422 | prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); |
| 3423 | 3423 | prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); |
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index ae79eb6..07443db 100644
|
a
|
b
|
static int swScale(SwsContext *c, const uint8_t *src[], |
| 422 | 422 | ) { |
| 423 | 423 | static int warnedAlready=0; |
| 424 | 424 | int cpu_flags = av_get_cpu_flags(); |
| 425 | | if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){ |
| | 425 | if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){ |
| 426 | 426 | av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n"); |
| 427 | 427 | warnedAlready=1; |
| 428 | 428 | } |
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 67cda51..b2e06e3 100644
|
a
|
b
|
void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI |
| 204 | 204 | } |
| 205 | 205 | } |
| 206 | 206 | |
| 207 | | #if HAVE_MMX2 |
| | 207 | #if HAVE_MMXEXT |
| 208 | 208 | static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, |
| 209 | 209 | const int16_t **src, uint8_t *dest, int dstW, |
| 210 | 210 | const uint8_t *dither, int offset) |