[FFmpeg-devel] [PATCH] Coremake support - ffmpeg_nommx.patch (1/1)

Sun May 20 20:05:44 CEST 2007

Hi,

Diego Biurrun <diego <at> biurrun.de> writes:
> On Mon, May 14, 2007 at 05:38:44PM -0400, Ronald S. Bultje wrote:
> > 
> > first, sorry for the threading, if you know of a way to fix that in  
> > digest mode, please let me know...
> 
> Not use digest mode?  Try gmane if the volume of ffmpeg-devel drowns
> you.

Let's see how this works. I tried webinterface, didn't work (too much 
quoted text, no attachments), then tried nntp: with xnntp, didn't work 
(couldn't send emails, or well it said it did, but it never actually 
did, I think - maybe you'll get this message 10 times eventually), so 
now I'm trying MT_NewsWatcher. Maybe this works. Let me know if 
threading is OK now.

> > --- ffmpeg.orig/libavcodec/dct-test.c 2007-03-22 01:00:46.000000000 -0400
> > +++ ffmpeg/libavcodec/dct-test.c   2007-03-22 01:20:53.000000000 -0400
> >  <at>  <at>  -39,6 +39,7  <at>  <at> 
> >  #define MAX(a, b)  (((a) > (b)) ? (a) : (b))
> >  #endif
> >  
> > +#undef fprintf
> >  #undef printf
> >  
> >  void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
> >  <at>  <at>  -46,13 +47,17  <at>  <at> 
> >  /* reference fdct/idct */
> >  extern void fdct(DCTELEM *block);
> >  extern void idct(DCTELEM *block);
> > +#if defined(HAVE_MMX) && defined(CONFIG_GPL)
> >  extern void ff_idct_xvid_mmx(DCTELEM *block);
> >  extern void ff_idct_xvid_mmx2(DCTELEM *block);
> > +#endif
> >  extern void init_fdct();
> >  
> >  extern void j_rev_dct(DCTELEM *data);
> > +#ifdef HAVE_MMX
> >  extern void ff_mmx_idct(DCTELEM *data);
> >  extern void ff_mmxext_idct(DCTELEM *data);
> > +#endif
> >  
> >  extern void odivx_idct_c (short *block);
> >  
> >  <at>  <at>  -83,6 +88,7  <at>  <at> 
> >  
> >  static short idct_mmx_perm[64];
> >  
> > +#ifdef HAVE_MMX
> >  static short idct_simple_mmx_perm[64]={
> >          0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
> >          0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
> >  <at>  <at>  -93,6 +99,7  <at>  <at> 
> >          0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
> >          0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
> >  };
> > +#endif
> >  
> >  void idct_mmx_init(void)
> >  {
> >  <at>  <at>  -162,14 +169,19  <at>  <at> 
> >          for(i=0; i<64; i++)
> >              block_org[i]= block1[i];
> >  
> > +#ifdef HAVE_MMX
> >          if (fdct_func == ff_mmx_idct ||
> >              fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
> > +#else
> > + if (fdct_func == j_rev_dct) {
> > +#endif
> >              for(i=0;i<64;i++)
> >                  block[idct_mmx_perm[i]] = block1[i];
> > +#ifdef HAVE_MMX
> >          } else if(fdct_func == ff_simple_idct_mmx ) {
> >              for(i=0;i<64;i++)
> >                  block[idct_simple_mmx_perm[i]] = block1[i];
> > -
> > +#endif
> >          } else {
> >              for(i=0; i<64; i++)
> >                  block[i]= block1[i];
> >  <at>  <at>  -184,7 +196,9  <at>  <at> 
> >  #endif
> >  
> >          fdct_func(block);
> > +#ifdef HAVE_MMX
> >          emms(); /* for ff_mmx_idct */
> > +#endif
> >  
> >          if (fdct_func == fdct_ifast
> >  #ifndef FAAN_POSTSCALE
> >  <at>  <at>  -261,13 +275,19  <at>  <at> 
> >      }break;
> >      }
> >  
> > +#ifdef HAVE_MMX
> >      if (fdct_func == ff_mmx_idct ||
> >          fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
> > +#else
> > +    if (fdct_func == j_rev_dct) {
> > +#endif
> >          for(i=0;i<64;i++)
> >              block[idct_mmx_perm[i]] = block1[i];
> > +#ifdef HAVE_MMX
> >      } else if(fdct_func == ff_simple_idct_mmx ) {
> >          for(i=0;i<64;i++)
> >              block[idct_simple_mmx_perm[i]] = block1[i];
> > +#endif
> >      } else {
> >          for(i=0; i<64; i++)
> >              block[i]= block1[i];
> >  <at>  <at>  -286,7 +306,9  <at>  <at> 
> >          it1 += NB_ITS_SPEED;
> >          ti1 = gettime() - ti;
> >      } while (ti1 < 1000000);
> > +#ifdef HAVE_MMX
> >      emms();
> > +#endif
> >  
> >      printf("%s %s: %0.1f kdct/s\n",
> >             is_idct ? "IDCT" : "DCT",
> >  <at>  <at>  -446,7 +468,9  <at>  <at> 
> >          it1 += NB_ITS_SPEED;
> >          ti1 = gettime() - ti;
> >      } while (ti1 < 1000000);
> > +#ifdef HAVE_MMX
> >      emms();
> > +#endif
> >  
> >      printf("%s %s: %0.1f kdct/s\n",
> >             1 ? "IDCT248" : "DCT248",
> >  <at>  <at>  -508,18 +532,26  <at>  <at> 
> >              dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */
> >              dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
> >              dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
> > +#ifdef HAVE_MMX
> >              dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
> >              dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test);
> > +#endif
> >              dct_error("FAAN", 0, ff_faandct, fdct, test);
> >          } else {
> >              dct_error("REF-DBL", 1, idct, idct, test);
> >              dct_error("INT", 1, j_rev_dct, idct, test);
> > +#ifdef HAVE_MMX
> >              dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test);
> >              dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test);
> > +#endif
> >              dct_error("SIMPLE-C", 1, simple_idct, idct, test);
> > +#ifdef HAVE_MMX
> >              dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test);
> > +#ifdef CONFIG_GPL
> >              dct_error("XVID-MMX", 1, ff_idct_xvid_mmx, idct, test);
> >              dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
> > +#endif
> > +#endif
> >              //        dct_error("ODIVX-C", 1, odivx_idct_c, idct);
> >              //printf(" test against odivx idct\n");
> >              //        dct_error("REF", 1, idct, odivx_idct_c);
> 
> Probably OK, you should submit this to us.

OK, see below.

> > --- ffmpeg.orig/libavcodec/imgresample.c 2007-03-22 01:00:47.000000000 -0400
> > +++ ffmpeg/libavcodec/imgresample.c   2007-03-22 01:20:53.000000000 -0400
> >  <at>  <at>  -28,8 +28,10  <at>  <at> 
> >  #include "swscale.h"
> >  #include "dsputil.h"
> >  
> > -#ifdef USE_FASTMEMCPY
> > -#include "libvo/fastmemcpy.h"
> > +#ifdef TEST
> > +#undef printf
> > +#undef fprintf
> > +#define av_log(p,l,...) fprintf(stderr,__VA_ARGS__)
> >  #endif
> 
> Removing the #include is pointless, the rest is probably OK.

So, I may remember this wrong, but I think the consensus a while ago 
(there was a "discussion" on this way back) was that this didn't really 
belong in ffmpeg, since it's mplayer-specific (see e.g "libvo/"). Other 
similar mplayer-specific code (e.g. the fourcc hacks in wav/avi) has 
also been removed in ffmpeg. In this case, mplayer could simply use 
-include fastmemcpy.h (similar to how the linux kernel does this kind of 
stuff) or change the sources if they want this. I've left it in for now, 
feel free to remove it (a similar line is in swscale/rgb2rgb.c).

> > --- ffmpeg.orig/libavcodec/i386/h264dsp_mmx.c  2007-03-22 01:00:40.000000000 -0400
> > +++ ffmpeg/libavcodec/i386/h264dsp_mmx.c 2007-03-22 01:20:53.000000000 -0400
> >  <at>  <at>  -386,7 +386,7  <at>  <at> 
> >  
> > -static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1,
> int8_t *tc0)
> > +static void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
> 
> ?
> 
> > --- ffmpeg.orig/libavcodec/i386/snowdsp_mmx.c  2007-03-22 01:00:40.000000000 -0400
> > +++ ffmpeg/libavcodec/i386/snowdsp_mmx.c 2007-03-22 01:20:53.000000000 -0400
> >  <at>  <at>  -459,7 +459,6  <at>  <at> 
> >          b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
> >          b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
> >      }
> > -
> >           asm volatile (
> >          "jmp 2f                                      \n\t"
> >          "1:                                          \n\t"
> >  <at>  <at>  -566,7 +565,6  <at>  <at> 
> >          b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
> >          b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
> >      }
> > -
> >      asm volatile(
> >          "jmp 2f                                      \n\t"
> >          "1:                                          \n\t"
> 
> What for?

First is for broken compilers (which I had to work with for a while :) ) 
and the second is the removal of a similar hack for broken compilers 
(there used to be alternate code there :) ) which I removed but 
unfortunately removed newlines as well. Both removed.

> > Index: ffmpeg/AUTHORS
> > ===================================================================
> > --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> > +++ ffmpeg/AUTHORS  2007-03-22 01:20:53.000000000 -0400
> > Index: ffmpeg/ChangeLog
> > ===================================================================
> > --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> > +++ ffmpeg/ChangeLog   2007-03-22 01:21:56.000000000 -0400
> 
> Why the heck do you duplicate these files?

Auto-crack (those files are required for a default dist, with those 
exact names and case).

> > Index: ffmpeg/libavcodec/liba52/Makefile.am
> > ===================================================================
> > --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> > +++ ffmpeg/libavcodec/liba52/Makefile.am 2007-03-22 01:20:53.000000000 -0400
> 
> This hasn't been updated in a while ...
> 
> > Index: ffmpeg/libavcodec/.cvsignore
> > ===================================================================
> > --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> > +++ ffmpeg/libavcodec/.cvsignore   2007-03-22 01:20:53.000000000 -0400
> 
> Still using CVS?

Both removed (indeed, left-overs from cvs/liba52 time, both were no more 
used as you may have guessed from libavcodec/Makefile.am). Thanks for 
helping me find those. :-).

> > Index: ffmpeg/libavcodec/allcodecs.c
> > ===================================================================
> > --- ffmpeg.orig/libavcodec/allcodecs.c   2007-03-22 01:00:46.000000000 -0400
> > +++ ffmpeg/libavcodec/allcodecs.c  2007-03-22 01:20:53.000000000 -0400
> >  <at>  <at>  -96,7 +96,9  <at>  <at> 
> >      REGISTER_ENCDEC (MJPEG, mjpeg);
> >      REGISTER_DECODER(MJPEGB, mjpegb);
> >      REGISTER_DECODER(MMVIDEO, mmvideo);
> > +#ifdef HAVE_XVMC
> >      REGISTER_DECODER(MPEG_XVMC, mpeg_xvmc);
> > +#endif
> 
> We were glad to get rid of these.  It seems our configure does have a
> feature or two more than yours after all ...

Probably. I'll be honest here, I'm not yet very interested in adding 
support for this, since A) the patch will never be accepted upstream 
anyway and B) it's a fair piece of work with no gain at all since I 
don't use it. For those part (ext. libs etc.) where I need it, I 
re-added the compile conditionals to make it compile. It's not hard to 
add it, and the patch would probably still be smaller (don't forget that 
the patch w/o compile conditionals was smaller than ffmpeg's system 
without compile conditionals also), I just don't want to spend the time 
unless I feel it'll be useful. Essentially, I'll add it if there's a 
chance that it goes upstream. I don't think that'll happen. 
Alternatively, if I see a reason to use it, I'll add it also (and then 
the above part of the patch will disappear).

Anyway, on to the relevant part of the patch for you. Attached, or if 
that doesn't work on 
http://people.freedesktop.org/~rbultje/ffmpeg_nommx.patch (still 
testing...), you'll find those parts of the patch that you referenced 
that I should submit separately. The patch does a bunch of things. First 
of all, the gains: it allows most of the tests to be compiled (by 
default, w/o mmx and w/o gpl). The changes that I made:

* most tests don't link to lav[ufc] and thus don't use av_log() but 
printf(). However, for utility macros, they do include avutil.h, and 
thus fail to compile b/c of the redefinition of av_log(). Thus, most 
tests need a #undef printf/fprintf to compile. Similar for malloc in 
swscale (last part of the patch).
* several tests reference mmx/gpl code w/o checking for whether this is 
enabled. Those parts have been marked with appropriate compile 
conditionals.
* as Mans suggested, emms -> emms_c
* in dsputil.c and dsputil_mmx.c/h264dsp_mmx.c, macros with the same 
names are used. dsptest.c in tests/ includes both of those, and thus the 
compile will give warnings. It's probably a good idea to #undef each of 
them or use similar names. Both already use #undefs internally several 
times for those variables (e.g. C[0-7]), since they're reused in various 
places with different values within the same files. I simply added 
#undefs at the end of where they're used also, so that multiple files 
can use the same macro names. H264_{WEIGHT,MC} same story.
* fastmemcpy buggage, see above, remove if unwanted (I don't care if it 
goes upstream, but I'll leave it in in my copy regardless).
* motion_test.c and dsptest.c had various API changes and I updated it 
for those API changes. Worksforme[tm].

It's various changes together, but all of it is needed to make the tests 
work, hence one big patch.

Have fun,
Ronald
Index: ffmpeg/libavcodec/dct-test.c
===================================================================

--- ffmpeg.orig/libavcodec/dct-test.c	2007-03-22 01:00:46.000000000 -0400
+++ ffmpeg/libavcodec/dct-test.c	2007-05-20 12:53:45.000000000 -0400
@@ -39,6 +39,7 @@
 #define MAX(a, b)  (((a) > (b)) ? (a) : (b))
 #endif
 
+#undef fprintf
 #undef printf
 
 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
@@ -46,13 +47,17 @@
 /* reference fdct/idct */
 extern void fdct(DCTELEM *block);
 extern void idct(DCTELEM *block);
+#if defined(HAVE_MMX) && defined(CONFIG_GPL)
 extern void ff_idct_xvid_mmx(DCTELEM *block);
 extern void ff_idct_xvid_mmx2(DCTELEM *block);
+#endif
 extern void init_fdct();
 
 extern void j_rev_dct(DCTELEM *data);
+#ifdef HAVE_MMX
 extern void ff_mmx_idct(DCTELEM *data);
 extern void ff_mmxext_idct(DCTELEM *data);
+#endif
 
 extern void odivx_idct_c (short *block);
 
@@ -83,6 +88,7 @@
 
 static short idct_mmx_perm[64];
 
+#ifdef HAVE_MMX
 static short idct_simple_mmx_perm[64]={
         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
@@ -93,6 +99,7 @@
         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 };
+#endif
 
 void idct_mmx_init(void)
 {
@@ -162,14 +169,19 @@
         for(i=0; i<64; i++)
             block_org[i]= block1[i];
 
+#ifdef HAVE_MMX
         if (fdct_func == ff_mmx_idct ||
             fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
+#else
+	if (fdct_func == j_rev_dct) {
+#endif
             for(i=0;i<64;i++)
                 block[idct_mmx_perm[i]] = block1[i];
+#ifdef HAVE_MMX
         } else if(fdct_func == ff_simple_idct_mmx ) {
             for(i=0;i<64;i++)
                 block[idct_simple_mmx_perm[i]] = block1[i];
-
+#endif
         } else {
             for(i=0; i<64; i++)
                 block[i]= block1[i];
@@ -184,7 +196,7 @@
 #endif
 
         fdct_func(block);
-        emms(); /* for ff_mmx_idct */
+        emms_c(); /* for ff_mmx_idct */
 
         if (fdct_func == fdct_ifast
 #ifndef FAAN_POSTSCALE
@@ -261,13 +273,19 @@
     }break;
     }
 
+#ifdef HAVE_MMX
     if (fdct_func == ff_mmx_idct ||
         fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
+#else
+    if (fdct_func == j_rev_dct) {
+#endif
         for(i=0;i<64;i++)
             block[idct_mmx_perm[i]] = block1[i];
+#ifdef HAVE_MMX
     } else if(fdct_func == ff_simple_idct_mmx ) {
         for(i=0;i<64;i++)
             block[idct_simple_mmx_perm[i]] = block1[i];
+#endif
     } else {
         for(i=0; i<64; i++)
             block[i]= block1[i];
@@ -286,7 +304,7 @@
         it1 += NB_ITS_SPEED;
         ti1 = gettime() - ti;
     } while (ti1 < 1000000);
-    emms();
+    emms_c();
 
     printf("%s %s: %0.1f kdct/s\n",
            is_idct ? "IDCT" : "DCT",
@@ -446,7 +464,7 @@
         it1 += NB_ITS_SPEED;
         ti1 = gettime() - ti;
     } while (ti1 < 1000000);
-    emms();
+    emms_c();
 
     printf("%s %s: %0.1f kdct/s\n",
            1 ? "IDCT248" : "DCT248",
@@ -508,18 +526,26 @@
             dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */
             dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
             dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
+#ifdef HAVE_MMX
             dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
             dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test);
+#endif
             dct_error("FAAN", 0, ff_faandct, fdct, test);
         } else {
             dct_error("REF-DBL", 1, idct, idct, test);
             dct_error("INT", 1, j_rev_dct, idct, test);
+#ifdef HAVE_MMX
             dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test);
             dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test);
+#endif
             dct_error("SIMPLE-C", 1, simple_idct, idct, test);
+#ifdef HAVE_MMX
             dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test);
+#ifdef CONFIG_GPL
             dct_error("XVID-MMX", 1, ff_idct_xvid_mmx, idct, test);
             dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
+#endif
+#endif
             //        dct_error("ODIVX-C", 1, odivx_idct_c, idct);
             //printf(" test against odivx idct\n");
             //        dct_error("REF", 1, idct, odivx_idct_c);
Index: ffmpeg/libavcodec/fft-test.c
===================================================================
--- ffmpeg.orig/libavcodec/fft-test.c	2007-03-22 01:00:48.000000000 -0400
+++ ffmpeg/libavcodec/fft-test.c	2007-03-22 01:20:53.000000000 -0400
@@ -28,6 +28,9 @@
 #include <unistd.h>
 #include <sys/time.h>
 
+#undef fprintf
+#undef printf
+
 int mm_flags;
 
 /* reference fft */
Index: ffmpeg/libavcodec/i386/cputest.c
===================================================================
--- ffmpeg.orig/libavcodec/i386/cputest.c	2007-03-22 01:00:40.000000000 -0400
+++ ffmpeg/libavcodec/i386/cputest.c	2007-03-22 01:20:53.000000000 -0400
@@ -120,6 +120,7 @@
 }
 
 #ifdef __TEST__
+#undef printf
 int main ( void )
 {
   int mm_flags;
Index: ffmpeg/libavcodec/i386/dsputil_mmx.c
===================================================================
--- ffmpeg.orig/libavcodec/i386/dsputil_mmx.c	2007-03-22 01:00:40.000000000 -0400
+++ ffmpeg/libavcodec/i386/dsputil_mmx.c	2007-05-20 12:11:02.000000000 -0400
@@ -3457,6 +3457,7 @@
             dspfunc(put_2tap_qpel, 1, 8);
             dspfunc(avg_2tap_qpel, 0, 16);
             dspfunc(avg_2tap_qpel, 1, 8);
+#undef dspfunc
 
             c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow;
             c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
Index: ffmpeg/libavcodec/i386/fdct_mmx.c
===================================================================
--- ffmpeg.orig/libavcodec/i386/fdct_mmx.c	2007-03-22 01:00:40.000000000 -0400
+++ ffmpeg/libavcodec/i386/fdct_mmx.c	2007-05-20 12:02:36.000000000 -0400
@@ -281,6 +281,13 @@
 #define C6 12299
 #define C7 6270
 TABLE_SSE2
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
 }};
 
 
Index: ffmpeg/libavcodec/i386/simple_idct_mmx.c
===================================================================
--- ffmpeg.orig/libavcodec/i386/simple_idct_mmx.c	2007-03-22 01:00:40.000000000 -0400
+++ ffmpeg/libavcodec/i386/simple_idct_mmx.c	2007-05-20 12:09:00.000000000 -0400
@@ -77,6 +77,14 @@
 
  C7, -C5,  C7, -C5,
  C3, -C1,  C3, -C1
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
 };
 
 #if 0
@@ -87,14 +95,6 @@
 
 static void inline idctCol (int16_t * col, int16_t *input)
 {
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-#undef C4
-#undef C5
-#undef C6
-#undef C7
         int a0, a1, a2, a3, b0, b1, b2, b3;
         const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
         const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
Index: ffmpeg/libavcodec/imgresample.c
===================================================================
--- ffmpeg.orig/libavcodec/imgresample.c	2007-03-22 01:00:47.000000000 -0400
+++ ffmpeg/libavcodec/imgresample.c	2007-03-22 01:20:53.000000000 -0400
@@ -28,8 +28,10 @@
 #include "swscale.h"
 #include "dsputil.h"
 
-#ifdef USE_FASTMEMCPY
-#include "libvo/fastmemcpy.h"
+#ifdef TEST
+#undef printf
+#undef fprintf
+#define av_log(p,l,...) fprintf(stderr,__VA_ARGS__)
 #endif
 
 #define NB_COMPONENTS 3
@@ -835,7 +837,9 @@
     fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
     fwrite(img,1, xsize * ysize,f);
     fclose(f);
+#ifndef TEST
 #define fprintf please_use_av_log
+#endif
 }
 
 static void dump_filter(int16_t *filter)
Index: ffmpeg/libavcodec/motion_test.c
===================================================================
--- ffmpeg.orig/libavcodec/motion_test.c	2007-03-22 01:00:50.000000000 -0400
+++ ffmpeg/libavcodec/motion_test.c	2007-05-20 12:55:02.000000000 -0400
@@ -31,24 +31,27 @@
 
 #include "dsputil.h"
 
+#ifdef HAVE_MMX
 #include "i386/mmx.h"
+#endif
 
 #undef printf
+#undef fprintf
 
-int pix_abs16x16_mmx(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_mmx1(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_c(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_x2_mmx(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_x2_mmx1(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_x2_c(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_y2_mmx(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_y2_mmx1(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_y2_c(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_xy2_mmx(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_xy2_mmx1(uint8_t *blk1, uint8_t *blk2, int lx);
-int pix_abs16x16_xy2_c(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_mmx(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_mmx1(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_c(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_x2_mmx(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_x2_mmx1(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_x2_c(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_y2_mmx(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_y2_mmx1(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_y2_c(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_xy2_mmx(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_xy2_mmx1(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
+int pix_abs16x16_xy2_c(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
 
-typedef int motion_func(uint8_t *blk1, uint8_t *blk2, int lx);
+typedef int motion_func(void *v, uint8_t *blk1, uint8_t *blk2, int lx, int h);
 
 #define WIDTH 64
 #define HEIGHT 64
@@ -103,15 +106,15 @@
         for(y=0;y<HEIGHT-17;y++) {
             for(x=0;x<WIDTH-17;x++) {
                 ptr = img2 + y * WIDTH + x;
-                d1 = test_func(img1, ptr, WIDTH);
-                d2 = ref_func(img1, ptr, WIDTH);
+                d1 = test_func(NULL, img1, ptr, WIDTH, 1);
+                d2 = ref_func(NULL, img1, ptr, WIDTH, 1);
                 if (d1 != d2) {
                     printf("error: mmx=%d c=%d\n", d1, d2);
                 }
             }
         }
     }
-    emms();
+    emms_c();
 
     /* speed test */
     ti = gettime();
@@ -120,11 +123,11 @@
         for(y=0;y<HEIGHT-17;y++) {
             for(x=0;x<WIDTH-17;x++) {
                 ptr = img2 + y * WIDTH + x;
-                d1 += test_func(img1, ptr, WIDTH);
+                d1 += test_func(NULL, img1, ptr, WIDTH, 1);
             }
         }
     }
-    emms();
+    emms_c();
     dummy = d1; /* avoid optimisation */
     ti = gettime() - ti;
 
@@ -136,13 +139,17 @@
 
 int main(int argc, char **argv)
 {
-    int c;
+    int n;
+#ifdef HAVE_MMX
+    AVCodecContext *ctx;
+    DSPContext c, mmx;
+#endif
 
     for(;;) {
-        c = getopt(argc, argv, "h");
-        if (c == -1)
+        n = getopt(argc, argv, "h");
+        if (n == -1)
             break;
-        switch(c) {
+        switch(n) {
         case 'h':
             help();
             break;
@@ -151,9 +158,25 @@
 
     printf("ffmpeg motion test\n");
 
-    test_motion("mmx", pix_abs16x16_mmx, pix_abs16x16_c);
-    test_motion("mmx_x2", pix_abs16x16_x2_mmx, pix_abs16x16_x2_c);
-    test_motion("mmx_y2", pix_abs16x16_y2_mmx, pix_abs16x16_y2_c);
-    test_motion("mmx_xy2", pix_abs16x16_xy2_mmx, pix_abs16x16_xy2_c);
+#ifdef HAVE_MMX
+    ctx = avcodec_alloc_context();
+    mm_flags = 0;
+    dsputil_init(&c, ctx);
+    mm_flags = MM_MMX;
+    dsputil_init(&mmx, ctx);
+
+    test_motion("mmx/16",     mmx.pix_abs[0][0], c.pix_abs[0][0]);
+    test_motion("mmx_x2/16",  mmx.pix_abs[0][1], c.pix_abs[0][1]);
+    test_motion("mmx_y2/16",  mmx.pix_abs[0][2], c.pix_abs[0][2]);
+    test_motion("mmx_xy2/16", mmx.pix_abs[0][3], c.pix_abs[0][3]);
+
+    test_motion("mmx/8",      mmx.pix_abs[1][0], c.pix_abs[1][0]);
+    test_motion("mmx_x2/8",   mmx.pix_abs[1][1], c.pix_abs[1][1]);
+    test_motion("mmx_y2/8",   mmx.pix_abs[1][2], c.pix_abs[1][2]);
+    test_motion("mmx_xy2/8",  mmx.pix_abs[1][3], c.pix_abs[1][3]);
+
+    av_free(ctx);
+#endif
+
     return 0;
 }
Index: ffmpeg/libavformat/grab.c
===================================================================
--- ffmpeg.orig/libavformat/grab.c	2007-03-22 01:00:54.000000000 -0400
+++ ffmpeg/libavformat/grab.c	2007-05-20 12:55:11.000000000 -0400
@@ -846,9 +846,7 @@
                 DEINT_LINE_LUM(12)
                 }
     }
-#ifdef HAVE_MMX
-    emms();
-#endif
+    emms_c();
     return s->frame_size;
 }
 
Index: ffmpeg/tests/dsptest.c
===================================================================
--- ffmpeg.orig/tests/dsptest.c	2007-03-22 01:00:52.000000000 -0400
+++ ffmpeg/tests/dsptest.c	2007-05-20 12:50:47.000000000 -0400
@@ -20,18 +20,18 @@
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
-#include "mpeg12data.h"
-#include "mpeg4data.h"
-#include "../libavcodec/i386/cputest.c"
-#include "../libavcodec/i386/dsputil_mmx.c"
-
-#include "../libavcodec/i386/fdct_mmx.c"
-#include "../libavcodec/i386/idct_mmx.c"
-#include "../libavcodec/i386/motion_est_mmx.c"
-#include "../libavcodec/i386/simple_idct_mmx.c"
-#include "../libavcodec/dsputil.c"
-#include "../libavcodec/simple_idct.c"
-#include "../libavcodec/jfdctfst.c"
+//#include "mpeg12data.h"
+//#include "mpeg4data.h"
+#include "i386/cputest.c"
+#include "i386/dsputil_mmx.c"
+
+#include "i386/fdct_mmx.c"
+#include "i386/idct_mmx.c"
+#include "i386/motion_est_mmx.c"
+#include "i386/simple_idct_mmx.c"
+#include "dsputil.c"
+#include "simple_idct.c"
+#include "jfdctfst.c"
 
 #undef TESTCPU_MAIN
 
@@ -73,27 +73,27 @@
     int mm_flags;
 } pix_func[] = {
 
-    PIX_FUNC_MMX(put_pixels),
+    PIX_FUNC_MMX(put_pixels8),
     //PIX_FUNC_MMX(get_pixels),
     //PIX_FUNC_MMX(put_pixels_clamped),
 #if 1
-    PIX_FUNC(put_pixels_x2),
-    PIX_FUNC(put_pixels_y2),
-    PIX_FUNC_MMX(put_pixels_xy2),
-
-    PIX_FUNC(put_no_rnd_pixels_x2),
-    PIX_FUNC(put_no_rnd_pixels_y2),
-    PIX_FUNC_MMX(put_no_rnd_pixels_xy2),
-
-    PIX_FUNC(avg_pixels),
-    PIX_FUNC(avg_pixels_x2),
-    PIX_FUNC(avg_pixels_y2),
-    PIX_FUNC(avg_pixels_xy2),
-
-    PIX_FUNC_MMX(avg_no_rnd_pixels),
-    PIX_FUNC_MMX(avg_no_rnd_pixels_x2),
-    PIX_FUNC_MMX(avg_no_rnd_pixels_y2),
-    PIX_FUNC_MMX(avg_no_rnd_pixels_xy2),
+    PIX_FUNC(put_pixels8_x2),
+    PIX_FUNC(put_pixels8_y2),
+    PIX_FUNC_MMX(put_pixels8_xy2),
+
+    PIX_FUNC(put_no_rnd_pixels8_x2),
+    PIX_FUNC(put_no_rnd_pixels8_y2),
+    PIX_FUNC_MMX(put_no_rnd_pixels8_xy2),
+
+    PIX_FUNC(avg_pixels8),
+    PIX_FUNC(avg_pixels8_x2),
+    PIX_FUNC(avg_pixels8_y2),
+    PIX_FUNC(avg_pixels8_xy2),
+
+    PIX_FUNC_MMX(avg_no_rnd_pixels8),
+    PIX_FUNC_MMX(avg_no_rnd_pixels8_x2),
+    PIX_FUNC_MMX(avg_no_rnd_pixels8_y2),
+    PIX_FUNC_MMX(avg_no_rnd_pixels8_xy2),
 #endif
     { 0, 0 }
 };
Index: ffmpeg/libavcodec/i386/h264dsp_mmx.c
===================================================================
--- ffmpeg.orig/libavcodec/i386/h264dsp_mmx.c	2007-03-22 01:00:40.000000000 -0400
+++ ffmpeg/libavcodec/i386/h264dsp_mmx.c	2007-05-20 12:13:28.000000000 -0400
@@ -1374,7 +1374,7 @@
 H264_MC(avg_, 4, mmx2)
 H264_MC(avg_, 8, mmx2)
 H264_MC(avg_, 16,mmx2)
-
+#undef H264_MC
 
 #define H264_CHROMA_OP(S,D)
 #define H264_CHROMA_OP4(S,D,T)
@@ -1514,4 +1514,4 @@
 H264_WEIGHT( 4, 8)
 H264_WEIGHT( 4, 4)
 H264_WEIGHT( 4, 2)
-
+#undef H264_WEIGHT
Index: ffmpeg/libswscale/rgb2rgb.c
===================================================================
--- ffmpeg.orig/libswscale/rgb2rgb.c	2007-03-22 01:00:59.000000000 -0400
+++ ffmpeg/libswscale/rgb2rgb.c	2007-03-22 01:20:53.000000000 -0400
@@ -33,9 +33,6 @@
 #include "swscale_internal.h"
 #include "x86_cpu.h"
 #include "bswap.h"
-#ifdef USE_FASTMEMCPY
-#include "libvo/fastmemcpy.h"
-#endif
 
 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
 
Index: ffmpeg/libswscale/cs_test.c
===================================================================
--- ffmpeg.orig/libswscale/cs_test.c	2007-03-22 01:00:59.000000000 -0400
+++ ffmpeg/libswscale/cs_test.c	2007-03-22 09:03:57.000000000 -0400
@@ -23,7 +23,9 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <inttypes.h>
+#ifdef HAVE_MALLOC_H
 #include <malloc.h>
+#endif
 
 #include "swscale.h"
 #include "rgb2rgb.h"
@@ -34,6 +36,7 @@
 
 #ifdef __APPLE_CC__
 #define memalign(x,y) malloc(y)
+#undef malloc
 #endif
 
 static int cpu_caps;