[FFmpeg-cvslog] r10640 - in trunk/libavcodec: Makefile dsputil.h imgresample.c ppc/check_altivec.c ppc/dsputil_altivec.c ppc/dsputil_altivec.h ppc/fft_altivec.c ppc/gmc_altivec.c ppc/h264_altivec.c ppc/imgresample_altivec.c ppc/imgresample_altivec.h ppc/mpegvideo_altivec.c ppc/util_altivec.h ppc/vc1dsp_altivec.c

Tue Oct 2 13:39:33 CEST 2007

Author: lu_zero
Date: Tue Oct  2 13:39:32 2007
New Revision: 10640

Log:
Sanitize altivec code so it can be built with runtime check properly

Added:
   trunk/libavcodec/ppc/check_altivec.c
      - copied, changed from r10492, /trunk/libavcodec/ppc/dsputil_altivec.c
   trunk/libavcodec/ppc/imgresample_altivec.c
      - copied, changed from r10492, /trunk/libavcodec/imgresample.c
   trunk/libavcodec/ppc/imgresample_altivec.h
   trunk/libavcodec/ppc/util_altivec.h
      - copied, changed from r10492, /trunk/libavcodec/ppc/dsputil_altivec.h
Modified:
   trunk/libavcodec/Makefile
   trunk/libavcodec/dsputil.h
   trunk/libavcodec/imgresample.c
   trunk/libavcodec/ppc/dsputil_altivec.c
   trunk/libavcodec/ppc/dsputil_altivec.h
   trunk/libavcodec/ppc/fft_altivec.c
   trunk/libavcodec/ppc/gmc_altivec.c
   trunk/libavcodec/ppc/h264_altivec.c
   trunk/libavcodec/ppc/mpegvideo_altivec.c
   trunk/libavcodec/ppc/vc1dsp_altivec.c

Modified: trunk/libavcodec/Makefile
==============================================================================

--- trunk/libavcodec/Makefile	(original)
+++ trunk/libavcodec/Makefile	Tue Oct  2 13:39:32 2007
@@ -403,7 +403,7 @@ OBJS-$(ARCH_SH4)                       +
                                           sh4/dsputil_align.o \
                                           sh4/dsputil_sh4.o   \
 
-OBJS-$(HAVE_ALTIVEC)                   += ppc/dsputil_altivec.o      \
+ALTIVEC-OBJS-yes                       += ppc/dsputil_altivec.o      \
                                           ppc/fdct_altivec.o         \
                                           ppc/fft_altivec.o          \
                                           ppc/float_altivec.o        \
@@ -413,12 +413,17 @@ OBJS-$(HAVE_ALTIVEC)                   +
                                           ppc/mpegvideo_altivec.o    \
                                           ppc/mpegvideo_ppc.o        \
 
-ifeq ($(HAVE_ALTIVEC),yes)
-OBJS-$(CONFIG_H264_DECODER)            += ppc/h264_altivec.o
-OBJS-$(CONFIG_SNOW_DECODER)            += ppc/snow_altivec.o
-OBJS-$(CONFIG_VC1_DECODER)             += ppc/vc1dsp_altivec.o
-OBJS-$(CONFIG_WMV3_DECODER)            += ppc/vc1dsp_altivec.o
-endif
+ALTIVEC-OBJS-$(CONFIG_H264_DECODER)    += ppc/h264_altivec.o
+ALTIVEC-OBJS-$(CONFIG_SNOW_DECODER)    += ppc/snow_altivec.o
+ALTIVEC-OBJS-$(CONFIG_VC1_DECODER)     += ppc/vc1dsp_altivec.o
+ALTIVEC-OBJS-$(CONFIG_WMV3_DECODER)    += ppc/vc1dsp_altivec.o
+
+# -maltivec is needed in order to build AltiVec code.
+$(ALTIVEC-OBJS-yes): CFLAGS += -maltivec -mabi=altivec
+
+# check_altivec must be built without -maltivec
+OBJS-$(HAVE_ALTIVEC)                   += $(ALTIVEC-OBJS-yes)       \
+                                          ppc/check_altivec.o
 
 OBJS-$(ARCH_BFIN)                      += bfin/dsputil_bfin.o \
                                           bfin/mpegvideo_bfin.o \

Modified: trunk/libavcodec/dsputil.h
==============================================================================
--- trunk/libavcodec/dsputil.h	(original)
+++ trunk/libavcodec/dsputil.h	Tue Oct  2 13:39:32 2007
@@ -557,12 +557,6 @@ extern int mm_flags;
 
 extern int mm_flags;
 
-#if defined(HAVE_ALTIVEC) && !defined(__APPLE_CC__)
-#define pixel altivec_pixel
-#include <altivec.h>
-#undef pixel
-#endif
-
 #define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
 #define STRIDE_ALIGN 16
 

Modified: trunk/libavcodec/imgresample.c
==============================================================================
--- trunk/libavcodec/imgresample.c	(original)
+++ trunk/libavcodec/imgresample.c	Tue Oct  2 13:39:32 2007
@@ -28,6 +28,10 @@
 #include "swscale.h"
 #include "dsputil.h"
 
+#ifdef HAVE_ALTIVEC
+#include "ppc/imgresample_altivec.h"
+#endif
+
 #define NB_COMPONENTS 3
 
 #define PHASE_BITS 4
@@ -281,133 +285,6 @@ static void v_resample4_mmx(uint8_t *dst
 }
 #endif /* HAVE_MMX */
 
-#ifdef HAVE_ALTIVEC
-typedef         union {
-    vector unsigned char v;
-    unsigned char c[16];
-} vec_uc_t;
-
-typedef         union {
-    vector signed short v;
-    signed short s[8];
-} vec_ss_t;
-
-void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
-                          int wrap, int16_t *filter)
-{
-    int sum, i;
-    const uint8_t *s;
-    vector unsigned char *tv, tmp, dstv, zero;
-    vec_ss_t srchv[4], srclv[4], fv[4];
-    vector signed short zeros, sumhv, sumlv;
-    s = src;
-
-    for(i=0;i<4;i++)
-    {
-        /*
-           The vec_madds later on does an implicit >>15 on the result.
-           Since FILTER_BITS is 8, and we have 15 bits of magnitude in
-           a signed short, we have just enough bits to pre-shift our
-           filter constants <<7 to compensate for vec_madds.
-        */
-        fv[i].s[0] = filter[i] << (15-FILTER_BITS);
-        fv[i].v = vec_splat(fv[i].v, 0);
-    }
-
-    zero = vec_splat_u8(0);
-    zeros = vec_splat_s16(0);
-
-
-    /*
-       When we're resampling, we'd ideally like both our input buffers,
-       and output buffers to be 16-byte aligned, so we can do both aligned
-       reads and writes. Sadly we can't always have this at the moment, so
-       we opt for aligned writes, as unaligned writes have a huge overhead.
-       To do this, do enough scalar resamples to get dst 16-byte aligned.
-    */
-    i = (-(int)dst) & 0xf;
-    while(i>0) {
-        sum = s[0 * wrap] * filter[0] +
-        s[1 * wrap] * filter[1] +
-        s[2 * wrap] * filter[2] +
-        s[3 * wrap] * filter[3];
-        sum = sum >> FILTER_BITS;
-        if (sum<0) sum = 0; else if (sum>255) sum=255;
-        dst[0] = sum;
-        dst++;
-        s++;
-        dst_width--;
-        i--;
-    }
-
-    /* Do our altivec resampling on 16 pixels at once. */
-    while(dst_width>=16) {
-        /*
-           Read 16 (potentially unaligned) bytes from each of
-           4 lines into 4 vectors, and split them into shorts.
-           Interleave the multipy/accumulate for the resample
-           filter with the loads to hide the 3 cycle latency
-           the vec_madds have.
-        */
-        tv = (vector unsigned char *) &s[0 * wrap];
-        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
-        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
-        srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
-        sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
-        sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
-
-        tv = (vector unsigned char *) &s[1 * wrap];
-        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
-        srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
-        srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
-        sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
-        sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
-
-        tv = (vector unsigned char *) &s[2 * wrap];
-        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
-        srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
-        srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
-        sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
-        sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
-
-        tv = (vector unsigned char *) &s[3 * wrap];
-        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
-        srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
-        srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
-        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
-        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
-
-        /*
-           Pack the results into our destination vector,
-           and do an aligned write of that back to memory.
-        */
-        dstv = vec_packsu(sumhv, sumlv) ;
-        vec_st(dstv, 0, (vector unsigned char *) dst);
-
-        dst+=16;
-        s+=16;
-        dst_width-=16;
-    }
-
-    /*
-       If there are any leftover pixels, resample them
-       with the slow scalar method.
-    */
-    while(dst_width>0) {
-        sum = s[0 * wrap] * filter[0] +
-        s[1 * wrap] * filter[1] +
-        s[2 * wrap] * filter[2] +
-        s[3 * wrap] * filter[3];
-        sum = sum >> FILTER_BITS;
-        if (sum<0) sum = 0; else if (sum>255) sum=255;
-        dst[0] = sum;
-        dst++;
-        s++;
-        dst_width--;
-    }
-}
-#endif /* HAVE_ALTIVEC */
-
 /* slow version to handle limit cases. Does not need optimisation */
 static void h_resample_slow(uint8_t *dst, int dst_width,
                             const uint8_t *src, int src_width,

Copied: trunk/libavcodec/ppc/check_altivec.c (from r10492, /trunk/libavcodec/ppc/dsputil_altivec.c)
==============================================================================
--- /trunk/libavcodec/ppc/dsputil_altivec.c	(original)
+++ trunk/libavcodec/ppc/check_altivec.c	Tue Oct  2 13:39:32 2007
@@ -1,8 +1,4 @@
 /*
- * Copyright (c) 2002 Brian Foley
- * Copyright (c) 2002 Dieter Shirley
- * Copyright (c) 2003-2004 Romain Dolbeau <romain at dolbeau.org>
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -20,11 +16,11 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "dsputil.h"
-
-#include "gcc_fixes.h"
 
-#include "dsputil_altivec.h"
+/**
+ * @file check_altivec.c
+ * Checks for AltiVec presence.
+ */
 
 #ifdef __APPLE__
 #include <sys/sysctl.h>
@@ -51,1371 +47,10 @@ static void sigill_handler (int sig)
 }
 #endif /* __APPLE__ */
 
-int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
-{
-    int i;
-    DECLARE_ALIGNED_16(int, s);
-    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
-    vector unsigned char *tv;
-    vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
-    vector unsigned int sad;
-    vector signed int sumdiffs;
-
-    s = 0;
-    sad = (vector unsigned int)vec_splat_u32(0);
-    for(i=0;i<h;i++) {
-        /*
-           Read unaligned pixels into our vectors. The vectors are as follows:
-           pix1v: pix1[0]-pix1[15]
-           pix2v: pix2[0]-pix2[15]      pix2iv: pix2[1]-pix2[16]
-        */
-        tv = (vector unsigned char *) pix1;
-        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
-
-        tv = (vector unsigned char *) &pix2[0];
-        pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
-
-        tv = (vector unsigned char *) &pix2[1];
-        pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));
-
-        /* Calculate the average vector */
-        avgv = vec_avg(pix2v, pix2iv);
-
-        /* Calculate a sum of abs differences vector */
-        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
-
-        /* Add each 4 pixel group together and put 4 results into sad */
-        sad = vec_sum4s(t5, sad);
-
-        pix1 += line_size;
-        pix2 += line_size;
-    }
-    /* Sum up the four partial sums, and put the result into s */
-    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
-    sumdiffs = vec_splat(sumdiffs, 3);
-    vec_ste(sumdiffs, 0, &s);
-
-    return s;
-}
-
-int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
-{
-    int i;
-    DECLARE_ALIGNED_16(int, s);
-    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
-    vector unsigned char *tv;
-    vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
-    vector unsigned int sad;
-    vector signed int sumdiffs;
-    uint8_t *pix3 = pix2 + line_size;
-
-    s = 0;
-    sad = (vector unsigned int)vec_splat_u32(0);
-
-    /*
-       Due to the fact that pix3 = pix2 + line_size, the pix3 of one
-       iteration becomes pix2 in the next iteration. We can use this
-       fact to avoid a potentially expensive unaligned read, each
-       time around the loop.
-       Read unaligned pixels into our vectors. The vectors are as follows:
-       pix2v: pix2[0]-pix2[15]
-       Split the pixel vectors into shorts
-    */
-    tv = (vector unsigned char *) &pix2[0];
-    pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
-
-    for(i=0;i<h;i++) {
-        /*
-           Read unaligned pixels into our vectors. The vectors are as follows:
-           pix1v: pix1[0]-pix1[15]
-           pix3v: pix3[0]-pix3[15]
-        */
-        tv = (vector unsigned char *) pix1;
-        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
-
-        tv = (vector unsigned char *) &pix3[0];
-        pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));
-
-        /* Calculate the average vector */
-        avgv = vec_avg(pix2v, pix3v);
-
-        /* Calculate a sum of abs differences vector */
-        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
-
-        /* Add each 4 pixel group together and put 4 results into sad */
-        sad = vec_sum4s(t5, sad);
-
-        pix1 += line_size;
-        pix2v = pix3v;
-        pix3 += line_size;
-
-    }
-
-    /* Sum up the four partial sums, and put the result into s */
-    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
-    sumdiffs = vec_splat(sumdiffs, 3);
-    vec_ste(sumdiffs, 0, &s);
-    return s;
-}
-
-int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
-{
-    int i;
-    DECLARE_ALIGNED_16(int, s);
-    uint8_t *pix3 = pix2 + line_size;
-    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
-    const_vector unsigned short two = (const_vector unsigned short)vec_splat_u16(2);
-    vector unsigned char *tv, avgv, t5;
-    vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
-    vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
-    vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
-    vector unsigned short avghv, avglv;
-    vector unsigned short t1, t2, t3, t4;
-    vector unsigned int sad;
-    vector signed int sumdiffs;
-
-    sad = (vector unsigned int)vec_splat_u32(0);
-
-    s = 0;
-
-    /*
-       Due to the fact that pix3 = pix2 + line_size, the pix3 of one
-       iteration becomes pix2 in the next iteration. We can use this
-       fact to avoid a potentially expensive unaligned read, as well
-       as some splitting, and vector addition each time around the loop.
-       Read unaligned pixels into our vectors. The vectors are as follows:
-       pix2v: pix2[0]-pix2[15]  pix2iv: pix2[1]-pix2[16]
-       Split the pixel vectors into shorts
-    */
-    tv = (vector unsigned char *) &pix2[0];
-    pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
-
-    tv = (vector unsigned char *) &pix2[1];
-    pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));
-
-    pix2hv = (vector unsigned short) vec_mergeh(zero, pix2v);
-    pix2lv = (vector unsigned short) vec_mergel(zero, pix2v);
-    pix2ihv = (vector unsigned short) vec_mergeh(zero, pix2iv);
-    pix2ilv = (vector unsigned short) vec_mergel(zero, pix2iv);
-    t1 = vec_add(pix2hv, pix2ihv);
-    t2 = vec_add(pix2lv, pix2ilv);
-
-    for(i=0;i<h;i++) {
-        /*
-           Read unaligned pixels into our vectors. The vectors are as follows:
-           pix1v: pix1[0]-pix1[15]
-           pix3v: pix3[0]-pix3[15]      pix3iv: pix3[1]-pix3[16]
-        */
-        tv = (vector unsigned char *) pix1;
-        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
-
-        tv = (vector unsigned char *) &pix3[0];
-        pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));
-
-        tv = (vector unsigned char *) &pix3[1];
-        pix3iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[1]));
-
-        /*
-          Note that Altivec does have vec_avg, but this works on vector pairs
-          and rounds up. We could do avg(avg(a,b),avg(c,d)), but the rounding
-          would mean that, for example, avg(3,0,0,1) = 2, when it should be 1.
-          Instead, we have to split the pixel vectors into vectors of shorts,
-          and do the averaging by hand.
-        */
-
-        /* Split the pixel vectors into shorts */
-        pix3hv = (vector unsigned short) vec_mergeh(zero, pix3v);
-        pix3lv = (vector unsigned short) vec_mergel(zero, pix3v);
-        pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv);
-        pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv);
-
-        /* Do the averaging on them */
-        t3 = vec_add(pix3hv, pix3ihv);
-        t4 = vec_add(pix3lv, pix3ilv);
-
-        avghv = vec_sr(vec_add(vec_add(t1, t3), two), two);
-        avglv = vec_sr(vec_add(vec_add(t2, t4), two), two);
-
-        /* Pack the shorts back into a result */
-        avgv = vec_pack(avghv, avglv);
-
-        /* Calculate a sum of abs differences vector */
-        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
-
-        /* Add each 4 pixel group together and put 4 results into sad */
-        sad = vec_sum4s(t5, sad);
-
-        pix1 += line_size;
-        pix3 += line_size;
-        /* Transfer the calculated values for pix3 into pix2 */
-        t1 = t3;
-        t2 = t4;
-    }
-    /* Sum up the four partial sums, and put the result into s */
-    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
-    sumdiffs = vec_splat(sumdiffs, 3);
-    vec_ste(sumdiffs, 0, &s);
-
-    return s;
-}
-
-int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
-{
-    int i;
-    DECLARE_ALIGNED_16(int, s);
-    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
-    vector unsigned char perm1, perm2, *pix1v, *pix2v;
-    vector unsigned char t1, t2, t3,t4, t5;
-    vector unsigned int sad;
-    vector signed int sumdiffs;
-
-    sad = (vector unsigned int)vec_splat_u32(0);
-
-
-    for(i=0;i<h;i++) {
-        /* Read potentially unaligned pixels into t1 and t2 */
-        perm1 = vec_lvsl(0, pix1);
-        pix1v = (vector unsigned char *) pix1;
-        perm2 = vec_lvsl(0, pix2);
-        pix2v = (vector unsigned char *) pix2;
-        t1 = vec_perm(pix1v[0], pix1v[1], perm1);
-        t2 = vec_perm(pix2v[0], pix2v[1], perm2);
-
-        /* Calculate a sum of abs differences vector */
-        t3 = vec_max(t1, t2);
-        t4 = vec_min(t1, t2);
-        t5 = vec_sub(t3, t4);
-
-        /* Add each 4 pixel group together and put 4 results into sad */
-        sad = vec_sum4s(t5, sad);
-
-        pix1 += line_size;
-        pix2 += line_size;
-    }
-
-    /* Sum up the four partial sums, and put the result into s */
-    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
-    sumdiffs = vec_splat(sumdiffs, 3);
-    vec_ste(sumdiffs, 0, &s);
-
-    return s;
-}
-
-int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
-{
-    int i;
-    DECLARE_ALIGNED_16(int, s);
-    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
-    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
-    vector unsigned char t1, t2, t3,t4, t5;
-    vector unsigned int sad;
-    vector signed int sumdiffs;
-
-    sad = (vector unsigned int)vec_splat_u32(0);
-
-    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
-
-    for(i=0;i<h;i++) {
-        /* Read potentially unaligned pixels into t1 and t2
-           Since we're reading 16 pixels, and actually only want 8,
-           mask out the last 8 pixels. The 0s don't change the sum. */
-        perm1 = vec_lvsl(0, pix1);
-        pix1v = (vector unsigned char *) pix1;
-        perm2 = vec_lvsl(0, pix2);
-        pix2v = (vector unsigned char *) pix2;
-        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
-        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
-
-        /* Calculate a sum of abs differences vector */
-        t3 = vec_max(t1, t2);
-        t4 = vec_min(t1, t2);
-        t5 = vec_sub(t3, t4);
-
-        /* Add each 4 pixel group together and put 4 results into sad */
-        sad = vec_sum4s(t5, sad);
-
-        pix1 += line_size;
-        pix2 += line_size;
-    }
-
-    /* Sum up the four partial sums, and put the result into s */
-    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
-    sumdiffs = vec_splat(sumdiffs, 3);
-    vec_ste(sumdiffs, 0, &s);
-
-    return s;
-}
-
-int pix_norm1_altivec(uint8_t *pix, int line_size)
-{
-    int i;
-    DECLARE_ALIGNED_16(int, s);
-    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
-    vector unsigned char *tv;
-    vector unsigned char pixv;
-    vector unsigned int sv;
-    vector signed int sum;
-
-    sv = (vector unsigned int)vec_splat_u32(0);
-
-    s = 0;
-    for (i = 0; i < 16; i++) {
-        /* Read in the potentially unaligned pixels */
-        tv = (vector unsigned char *) pix;
-        pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));
-
-        /* Square the values, and add them to our sum */
-        sv = vec_msum(pixv, pixv, sv);
-
-        pix += line_size;
-    }
-    /* Sum up the four partial sums, and put the result into s */
-    sum = vec_sums((vector signed int) sv, (vector signed int) zero);
-    sum = vec_splat(sum, 3);
-    vec_ste(sum, 0, &s);
-
-    return s;
-}
-
 /**
- * Sum of Squared Errors for a 8x8 block.
- * AltiVec-enhanced.
- * It's the sad8_altivec code above w/ squaring added.
- */
-int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
-{
-    int i;
-    DECLARE_ALIGNED_16(int, s);
-    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
-    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
-    vector unsigned char t1, t2, t3,t4, t5;
-    vector unsigned int sum;
-    vector signed int sumsqr;
-
-    sum = (vector unsigned int)vec_splat_u32(0);
-
-    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
-
-
-    for(i=0;i<h;i++) {
-        /* Read potentially unaligned pixels into t1 and t2
-           Since we're reading 16 pixels, and actually only want 8,
-           mask out the last 8 pixels. The 0s don't change the sum. */
-        perm1 = vec_lvsl(0, pix1);
-        pix1v = (vector unsigned char *) pix1;
-        perm2 = vec_lvsl(0, pix2);
-        pix2v = (vector unsigned char *) pix2;
-        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
-        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
-
-        /*
-          Since we want to use unsigned chars, we can take advantage
-          of the fact that abs(a-b)^2 = (a-b)^2.
-        */
-
-        /* Calculate abs differences vector */
-        t3 = vec_max(t1, t2);
-        t4 = vec_min(t1, t2);
-        t5 = vec_sub(t3, t4);
-
-        /* Square the values and add them to our sum */
-        sum = vec_msum(t5, t5, sum);
-
-        pix1 += line_size;
-        pix2 += line_size;
-    }
-
-    /* Sum up the four partial sums, and put the result into s */
-    sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
-    sumsqr = vec_splat(sumsqr, 3);
-    vec_ste(sumsqr, 0, &s);
-
-    return s;
-}
-
-/**
- * Sum of Squared Errors for a 16x16 block.
- * AltiVec-enhanced.
- * It's the sad16_altivec code above w/ squaring added.
+ * This function MAY rely on signal() or fork() in order to make sure altivec
+ * is present
  */
-int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
-{
-    int i;
-    DECLARE_ALIGNED_16(int, s);
-    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
-    vector unsigned char perm1, perm2, *pix1v, *pix2v;
-    vector unsigned char t1, t2, t3,t4, t5;
-    vector unsigned int sum;
-    vector signed int sumsqr;
-
-    sum = (vector unsigned int)vec_splat_u32(0);
-
-    for(i=0;i<h;i++) {
-        /* Read potentially unaligned pixels into t1 and t2 */
-        perm1 = vec_lvsl(0, pix1);
-        pix1v = (vector unsigned char *) pix1;
-        perm2 = vec_lvsl(0, pix2);
-        pix2v = (vector unsigned char *) pix2;
-        t1 = vec_perm(pix1v[0], pix1v[1], perm1);
-        t2 = vec_perm(pix2v[0], pix2v[1], perm2);
-
-        /*
-          Since we want to use unsigned chars, we can take advantage
-          of the fact that abs(a-b)^2 = (a-b)^2.
-        */
-
-        /* Calculate abs differences vector */
-        t3 = vec_max(t1, t2);
-        t4 = vec_min(t1, t2);
-        t5 = vec_sub(t3, t4);
-
-        /* Square the values and add them to our sum */
-        sum = vec_msum(t5, t5, sum);
-
-        pix1 += line_size;
-        pix2 += line_size;
-    }
-
-    /* Sum up the four partial sums, and put the result into s */
-    sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
-    sumsqr = vec_splat(sumsqr, 3);
-    vec_ste(sumsqr, 0, &s);
-
-    return s;
-}
-
-int pix_sum_altivec(uint8_t * pix, int line_size)
-{
-    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
-    vector unsigned char perm, *pixv;
-    vector unsigned char t1;
-    vector unsigned int sad;
-    vector signed int sumdiffs;
-
-    int i;
-    DECLARE_ALIGNED_16(int, s);
-
-    sad = (vector unsigned int)vec_splat_u32(0);
-
-    for (i = 0; i < 16; i++) {
-        /* Read the potentially unaligned 16 pixels into t1 */
-        perm = vec_lvsl(0, pix);
-        pixv = (vector unsigned char *) pix;
-        t1 = vec_perm(pixv[0], pixv[1], perm);
-
-        /* Add each 4 pixel group together and put 4 results into sad */
-        sad = vec_sum4s(t1, sad);
-
-        pix += line_size;
-    }
-
-    /* Sum up the four partial sums, and put the result into s */
-    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
-    sumdiffs = vec_splat(sumdiffs, 3);
-    vec_ste(sumdiffs, 0, &s);
-
-    return s;
-}
-
-void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
-{
-    int i;
-    vector unsigned char perm, bytes, *pixv;
-    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
-    vector signed short shorts;
-
-    for(i=0;i<8;i++)
-    {
-        // Read potentially unaligned pixels.
-        // We're reading 16 pixels, and actually only want 8,
-        // but we simply ignore the extras.
-        perm = vec_lvsl(0, pixels);
-        pixv = (vector unsigned char *) pixels;
-        bytes = vec_perm(pixv[0], pixv[1], perm);
-
-        // convert the bytes into shorts
-        shorts = (vector signed short)vec_mergeh(zero, bytes);
-
-        // save the data to the block, we assume the block is 16-byte aligned
-        vec_st(shorts, i*16, (vector signed short*)block);
-
-        pixels += line_size;
-    }
-}
-
-void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
-        const uint8_t *s2, int stride)
-{
-    int i;
-    vector unsigned char perm, bytes, *pixv;
-    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
-    vector signed short shorts1, shorts2;
-
-    for(i=0;i<4;i++)
-    {
-        // Read potentially unaligned pixels
-        // We're reading 16 pixels, and actually only want 8,
-        // but we simply ignore the extras.
-        perm = vec_lvsl(0, s1);
-        pixv = (vector unsigned char *) s1;
-        bytes = vec_perm(pixv[0], pixv[1], perm);
-
-        // convert the bytes into shorts
-        shorts1 = (vector signed short)vec_mergeh(zero, bytes);
-
-        // Do the same for the second block of pixels
-        perm = vec_lvsl(0, s2);
-        pixv = (vector unsigned char *) s2;
-        bytes = vec_perm(pixv[0], pixv[1], perm);
-
-        // convert the bytes into shorts
-        shorts2 = (vector signed short)vec_mergeh(zero, bytes);
-
-        // Do the subtraction
-        shorts1 = vec_sub(shorts1, shorts2);
-
-        // save the data to the block, we assume the block is 16-byte aligned
-        vec_st(shorts1, 0, (vector signed short*)block);
-
-        s1 += stride;
-        s2 += stride;
-        block += 8;
-
-
-        // The code below is a copy of the code above... This is a manual
-        // unroll.
-
-        // Read potentially unaligned pixels
-        // We're reading 16 pixels, and actually only want 8,
-        // but we simply ignore the extras.
-        perm = vec_lvsl(0, s1);
-        pixv = (vector unsigned char *) s1;
-        bytes = vec_perm(pixv[0], pixv[1], perm);
-
-        // convert the bytes into shorts
-        shorts1 = (vector signed short)vec_mergeh(zero, bytes);
-
-        // Do the same for the second block of pixels
-        perm = vec_lvsl(0, s2);
-        pixv = (vector unsigned char *) s2;
-        bytes = vec_perm(pixv[0], pixv[1], perm);
-
-        // convert the bytes into shorts
-        shorts2 = (vector signed short)vec_mergeh(zero, bytes);
-
-        // Do the subtraction
-        shorts1 = vec_sub(shorts1, shorts2);
-
-        // save the data to the block, we assume the block is 16-byte aligned
-        vec_st(shorts1, 0, (vector signed short*)block);
-
-        s1 += stride;
-        s2 += stride;
-        block += 8;
-    }
-}
-
-void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
-    register int i;
-    register vector unsigned char vdst, vsrc;
-
-    /* dst and src are 16 bytes-aligned (guaranteed) */
-    for(i = 0 ; (i + 15) < w ; i+=16)
-    {
-      vdst = vec_ld(i, (unsigned char*)dst);
-      vsrc = vec_ld(i, (unsigned char*)src);
-      vdst = vec_add(vsrc, vdst);
-      vec_st(vdst, i, (unsigned char*)dst);
-    }
-    /* if w is not a multiple of 16 */
-    for (; (i < w) ; i++)
-    {
-      dst[i] = src[i];
-    }
-}
-
-/* next one assumes that ((line_size % 16) == 0) */
-void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
-POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1);
-    register vector unsigned char pixelsv1, pixelsv2;
-    register vector unsigned char pixelsv1B, pixelsv2B;
-    register vector unsigned char pixelsv1C, pixelsv2C;
-    register vector unsigned char pixelsv1D, pixelsv2D;
-
-    register vector unsigned char perm = vec_lvsl(0, pixels);
-    int i;
-    register int line_size_2 = line_size << 1;
-    register int line_size_3 = line_size + line_size_2;
-    register int line_size_4 = line_size << 2;
-
-POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
-// hand-unrolling the loop by 4 gains about 15%
-// mininum execution time goes from 74 to 60 cycles
-// it's faster than -funroll-loops, but using
-// -funroll-loops w/ this is bad - 74 cycles again.
-// all this is on a 7450, tuning for the 7450
-#if 0
-    for(i=0; i<h; i++) {
-      pixelsv1 = vec_ld(0, (unsigned char*)pixels);
-      pixelsv2 = vec_ld(16, (unsigned char*)pixels);
-      vec_st(vec_perm(pixelsv1, pixelsv2, perm),
-             0, (unsigned char*)block);
-      pixels+=line_size;
-      block +=line_size;
-    }
-#else
-    for(i=0; i<h; i+=4) {
-      pixelsv1 = vec_ld(0, (unsigned char*)pixels);
-      pixelsv2 = vec_ld(15, (unsigned char*)pixels);
-      pixelsv1B = vec_ld(line_size, (unsigned char*)pixels);
-      pixelsv2B = vec_ld(15 + line_size, (unsigned char*)pixels);
-      pixelsv1C = vec_ld(line_size_2, (unsigned char*)pixels);
-      pixelsv2C = vec_ld(15 + line_size_2, (unsigned char*)pixels);
-      pixelsv1D = vec_ld(line_size_3, (unsigned char*)pixels);
-      pixelsv2D = vec_ld(15 + line_size_3, (unsigned char*)pixels);
-      vec_st(vec_perm(pixelsv1, pixelsv2, perm),
-             0, (unsigned char*)block);
-      vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
-             line_size, (unsigned char*)block);
-      vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
-             line_size_2, (unsigned char*)block);
-      vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
-             line_size_3, (unsigned char*)block);
-      pixels+=line_size_4;
-      block +=line_size_4;
-    }
-#endif
-POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
-}
-
-/* next one assumes that ((line_size % 16) == 0) */
-#define op_avg(a,b)  a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
-void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
-POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1);
-    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
-    register vector unsigned char perm = vec_lvsl(0, pixels);
-    int i;
-
-POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
-
-    for(i=0; i<h; i++) {
-      pixelsv1 = vec_ld(0, (unsigned char*)pixels);
-      pixelsv2 = vec_ld(16, (unsigned char*)pixels);
-      blockv = vec_ld(0, block);
-      pixelsv = vec_perm(pixelsv1, pixelsv2, perm);
-      blockv = vec_avg(blockv,pixelsv);
-      vec_st(blockv, 0, (unsigned char*)block);
-      pixels+=line_size;
-      block +=line_size;
-    }
-
-POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
-}
-
-/* next one assumes that ((line_size % 8) == 0) */
-void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
-{
-POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);
-    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
-    int i;
-
-POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
-
-   for (i = 0; i < h; i++) {
-     /*
-       block is 8 bytes-aligned, so we're either in the
-       left block (16 bytes-aligned) or in the right block (not)
-     */
-     int rightside = ((unsigned long)block & 0x0000000F);
-
-     blockv = vec_ld(0, block);
-     pixelsv1 = vec_ld(0, (unsigned char*)pixels);
-     pixelsv2 = vec_ld(16, (unsigned char*)pixels);
-     pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
-
-     if (rightside)
-     {
-       pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1));
-     }
-     else
-     {
-       pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3));
-     }
-
-     blockv = vec_avg(blockv, pixelsv);
-
-     vec_st(blockv, 0, block);
-
-     pixels += line_size;
-     block += line_size;
-   }
-
-POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
-}
-
-/* next one assumes that ((line_size % 8) == 0) */
-void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
-POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
-   register int i;
-   register vector unsigned char
-     pixelsv1, pixelsv2,
-     pixelsavg;
-   register vector unsigned char
-     blockv, temp1, temp2;
-   register vector unsigned short
-     pixelssum1, pixelssum2, temp3;
-   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
-   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
-
-   temp1 = vec_ld(0, pixels);
-   temp2 = vec_ld(16, pixels);
-   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
-   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
-   {
-     pixelsv2 = temp2;
-   }
-   else
-   {
-     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
-   }
-   pixelsv1 = vec_mergeh(vczero, pixelsv1);
-   pixelsv2 = vec_mergeh(vczero, pixelsv2);
-   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
-                        (vector unsigned short)pixelsv2);
-   pixelssum1 = vec_add(pixelssum1, vctwo);
-
-POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
-   for (i = 0; i < h ; i++) {
-     int rightside = ((unsigned long)block & 0x0000000F);
-     blockv = vec_ld(0, block);
-
-     temp1 = vec_ld(line_size, pixels);
-     temp2 = vec_ld(line_size + 16, pixels);
-     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
-     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
-     {
-       pixelsv2 = temp2;
-     }
-     else
-     {
-       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
-     }
-
-     pixelsv1 = vec_mergeh(vczero, pixelsv1);
-     pixelsv2 = vec_mergeh(vczero, pixelsv2);
-     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
-                          (vector unsigned short)pixelsv2);
-     temp3 = vec_add(pixelssum1, pixelssum2);
-     temp3 = vec_sra(temp3, vctwo);
-     pixelssum1 = vec_add(pixelssum2, vctwo);
-     pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
-
-     if (rightside)
-     {
-       blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
-     }
-     else
-     {
-       blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
-     }
-
-     vec_st(blockv, 0, block);
-
-     block += line_size;
-     pixels += line_size;
-   }
-
-POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
-}
-
-/* next one assumes that ((line_size % 8) == 0) */
-void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
-POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
-   register int i;
-   register vector unsigned char
-     pixelsv1, pixelsv2,
-     pixelsavg;
-   register vector unsigned char
-     blockv, temp1, temp2;
-   register vector unsigned short
-     pixelssum1, pixelssum2, temp3;
-   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
-   register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
-   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
-
-   temp1 = vec_ld(0, pixels);
-   temp2 = vec_ld(16, pixels);
-   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
-   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
-   {
-     pixelsv2 = temp2;
-   }
-   else
-   {
-     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
-   }
-   pixelsv1 = vec_mergeh(vczero, pixelsv1);
-   pixelsv2 = vec_mergeh(vczero, pixelsv2);
-   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
-                        (vector unsigned short)pixelsv2);
-   pixelssum1 = vec_add(pixelssum1, vcone);
-
-POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
-   for (i = 0; i < h ; i++) {
-     int rightside = ((unsigned long)block & 0x0000000F);
-     blockv = vec_ld(0, block);
-
-     temp1 = vec_ld(line_size, pixels);
-     temp2 = vec_ld(line_size + 16, pixels);
-     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
-     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
-     {
-       pixelsv2 = temp2;
-     }
-     else
-     {
-       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
-     }
-
-     pixelsv1 = vec_mergeh(vczero, pixelsv1);
-     pixelsv2 = vec_mergeh(vczero, pixelsv2);
-     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
-                          (vector unsigned short)pixelsv2);
-     temp3 = vec_add(pixelssum1, pixelssum2);
-     temp3 = vec_sra(temp3, vctwo);
-     pixelssum1 = vec_add(pixelssum2, vcone);
-     pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
-
-     if (rightside)
-     {
-       blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
-     }
-     else
-     {
-       blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
-     }
-
-     vec_st(blockv, 0, block);
-
-     block += line_size;
-     pixels += line_size;
-   }
-
-POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
-}
-
-/* next one assumes that ((line_size % 16) == 0) */
-void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
-{
-POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
-   register int i;
-   register vector unsigned char
-     pixelsv1, pixelsv2, pixelsv3, pixelsv4;
-   register vector unsigned char
-     blockv, temp1, temp2;
-   register vector unsigned short
-     pixelssum1, pixelssum2, temp3,
-     pixelssum3, pixelssum4, temp4;
-   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
-   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
-
-POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
-
-   temp1 = vec_ld(0, pixels);
-   temp2 = vec_ld(16, pixels);
-   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
-   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
-   {
-     pixelsv2 = temp2;
-   }
-   else
-   {
-     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
-   }
-   pixelsv3 = vec_mergel(vczero, pixelsv1);
-   pixelsv4 = vec_mergel(vczero, pixelsv2);
-   pixelsv1 = vec_mergeh(vczero, pixelsv1);
-   pixelsv2 = vec_mergeh(vczero, pixelsv2);
-   pixelssum3 = vec_add((vector unsigned short)pixelsv3,
-                        (vector unsigned short)pixelsv4);
-   pixelssum3 = vec_add(pixelssum3, vctwo);
-   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
-                        (vector unsigned short)pixelsv2);
-   pixelssum1 = vec_add(pixelssum1, vctwo);
-
-   for (i = 0; i < h ; i++) {
-     blockv = vec_ld(0, block);
-
-     temp1 = vec_ld(line_size, pixels);
-     temp2 = vec_ld(line_size + 16, pixels);
-     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
-     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
-     {
-       pixelsv2 = temp2;
-     }
-     else
-     {
-       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
-     }
-
-     pixelsv3 = vec_mergel(vczero, pixelsv1);
-     pixelsv4 = vec_mergel(vczero, pixelsv2);
-     pixelsv1 = vec_mergeh(vczero, pixelsv1);
-     pixelsv2 = vec_mergeh(vczero, pixelsv2);
-
-     pixelssum4 = vec_add((vector unsigned short)pixelsv3,
-                          (vector unsigned short)pixelsv4);
-     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
-                          (vector unsigned short)pixelsv2);
-     temp4 = vec_add(pixelssum3, pixelssum4);
-     temp4 = vec_sra(temp4, vctwo);
-     temp3 = vec_add(pixelssum1, pixelssum2);
-     temp3 = vec_sra(temp3, vctwo);
-
-     pixelssum3 = vec_add(pixelssum4, vctwo);
-     pixelssum1 = vec_add(pixelssum2, vctwo);
-
-     blockv = vec_packsu(temp3, temp4);
-
-     vec_st(blockv, 0, block);
-
-     block += line_size;
-     pixels += line_size;
-   }
-
-POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
-}
-
-/* next one assumes that ((line_size % 16) == 0) */
-void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
-{
-POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
-   register int i;
-   register vector unsigned char
-     pixelsv1, pixelsv2, pixelsv3, pixelsv4;
-   register vector unsigned char
-     blockv, temp1, temp2;
-   register vector unsigned short
-     pixelssum1, pixelssum2, temp3,
-     pixelssum3, pixelssum4, temp4;
-   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
-   register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
-   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
-
-POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
-
-   temp1 = vec_ld(0, pixels);
-   temp2 = vec_ld(16, pixels);
-   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
-   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
-   {
-     pixelsv2 = temp2;
-   }
-   else
-   {
-     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
-   }
-   pixelsv3 = vec_mergel(vczero, pixelsv1);
-   pixelsv4 = vec_mergel(vczero, pixelsv2);
-   pixelsv1 = vec_mergeh(vczero, pixelsv1);
-   pixelsv2 = vec_mergeh(vczero, pixelsv2);
-   pixelssum3 = vec_add((vector unsigned short)pixelsv3,
-                        (vector unsigned short)pixelsv4);
-   pixelssum3 = vec_add(pixelssum3, vcone);
-   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
-                        (vector unsigned short)pixelsv2);
-   pixelssum1 = vec_add(pixelssum1, vcone);
-
-   for (i = 0; i < h ; i++) {
-     blockv = vec_ld(0, block);
-
-     temp1 = vec_ld(line_size, pixels);
-     temp2 = vec_ld(line_size + 16, pixels);
-     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
-     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
-     {
-       pixelsv2 = temp2;
-     }
-     else
-     {
-       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
-     }
-
-     pixelsv3 = vec_mergel(vczero, pixelsv1);
-     pixelsv4 = vec_mergel(vczero, pixelsv2);
-     pixelsv1 = vec_mergeh(vczero, pixelsv1);
-     pixelsv2 = vec_mergeh(vczero, pixelsv2);
-
-     pixelssum4 = vec_add((vector unsigned short)pixelsv3,
-                          (vector unsigned short)pixelsv4);
-     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
-                          (vector unsigned short)pixelsv2);
-     temp4 = vec_add(pixelssum3, pixelssum4);
-     temp4 = vec_sra(temp4, vctwo);
-     temp3 = vec_add(pixelssum1, pixelssum2);
-     temp3 = vec_sra(temp3, vctwo);
-
-     pixelssum3 = vec_add(pixelssum4, vcone);
-     pixelssum1 = vec_add(pixelssum2, vcone);
-
-     blockv = vec_packsu(temp3, temp4);
-
-     vec_st(blockv, 0, block);
-
-     block += line_size;
-     pixels += line_size;
-   }
-
-POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
-}
-
-int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
-POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
-    int sum;
-    register const_vector unsigned char vzero =
-                            (const_vector unsigned char)vec_splat_u8(0);
-    register vector signed short temp0, temp1, temp2, temp3, temp4,
-                                 temp5, temp6, temp7;
-POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
-  {
-    register const_vector signed short vprod1 =(const_vector signed short)
-                                        AVV( 1,-1, 1,-1, 1,-1, 1,-1);
-    register const_vector signed short vprod2 =(const_vector signed short)
-                                        AVV( 1, 1,-1,-1, 1, 1,-1,-1);
-    register const_vector signed short vprod3 =(const_vector signed short)
-                                        AVV( 1, 1, 1, 1,-1,-1,-1,-1);
-    register const_vector unsigned char perm1 = (const_vector unsigned char)
-      AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
-          0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
-    register const_vector unsigned char perm2 = (const_vector unsigned char)
-      AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
-          0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
-    register const_vector unsigned char perm3 = (const_vector unsigned char)
-      AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
-          0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-
-#define ONEITERBUTTERFLY(i, res)                                        \
-    {                                                                   \
-      register vector unsigned char src1, src2, srcO;                   \
-      register vector unsigned char dst1, dst2, dstO;                   \
-      register vector signed short srcV, dstV;                          \
-      register vector signed short but0, but1, but2, op1, op2, op3;     \
-      src1 = vec_ld(stride * i, src);                                   \
-      src2 = vec_ld((stride * i) + 15, src);                            \
-      srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src));           \
-      dst1 = vec_ld(stride * i, dst);                                   \
-      dst2 = vec_ld((stride * i) + 15, dst);                            \
-      dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst));           \
-      /* promote the unsigned chars to signed shorts */                 \
-      /* we're in the 8x8 function, we only care for the first 8 */     \
-      srcV =                                                            \
-        (vector signed short)vec_mergeh((vector signed char)vzero,      \
-        (vector signed char)srcO);                                      \
-      dstV =                                                            \
-        (vector signed short)vec_mergeh((vector signed char)vzero,      \
-        (vector signed char)dstO);                                      \
-      /* substractions inside the first butterfly */                    \
-      but0 = vec_sub(srcV, dstV);                                       \
-      op1 = vec_perm(but0, but0, perm1);                                \
-      but1 = vec_mladd(but0, vprod1, op1);                              \
-      op2 = vec_perm(but1, but1, perm2);                                \
-      but2 = vec_mladd(but1, vprod2, op2);                              \
-      op3 = vec_perm(but2, but2, perm3);                                \
-      res = vec_mladd(but2, vprod3, op3);                               \
-    }
-    ONEITERBUTTERFLY(0, temp0);
-    ONEITERBUTTERFLY(1, temp1);
-    ONEITERBUTTERFLY(2, temp2);
-    ONEITERBUTTERFLY(3, temp3);
-    ONEITERBUTTERFLY(4, temp4);
-    ONEITERBUTTERFLY(5, temp5);
-    ONEITERBUTTERFLY(6, temp6);
-    ONEITERBUTTERFLY(7, temp7);
-  }
-#undef ONEITERBUTTERFLY
-  {
-    register vector signed int vsum;
-    register vector signed short line0 = vec_add(temp0, temp1);
-    register vector signed short line1 = vec_sub(temp0, temp1);
-    register vector signed short line2 = vec_add(temp2, temp3);
-    register vector signed short line3 = vec_sub(temp2, temp3);
-    register vector signed short line4 = vec_add(temp4, temp5);
-    register vector signed short line5 = vec_sub(temp4, temp5);
-    register vector signed short line6 = vec_add(temp6, temp7);
-    register vector signed short line7 = vec_sub(temp6, temp7);
-
-    register vector signed short line0B = vec_add(line0, line2);
-    register vector signed short line2B = vec_sub(line0, line2);
-    register vector signed short line1B = vec_add(line1, line3);
-    register vector signed short line3B = vec_sub(line1, line3);
-    register vector signed short line4B = vec_add(line4, line6);
-    register vector signed short line6B = vec_sub(line4, line6);
-    register vector signed short line5B = vec_add(line5, line7);
-    register vector signed short line7B = vec_sub(line5, line7);
-
-    register vector signed short line0C = vec_add(line0B, line4B);
-    register vector signed short line4C = vec_sub(line0B, line4B);
-    register vector signed short line1C = vec_add(line1B, line5B);
-    register vector signed short line5C = vec_sub(line1B, line5B);
-    register vector signed short line2C = vec_add(line2B, line6B);
-    register vector signed short line6C = vec_sub(line2B, line6B);
-    register vector signed short line3C = vec_add(line3B, line7B);
-    register vector signed short line7C = vec_sub(line3B, line7B);
-
-    vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
-    vsum = vec_sum4s(vec_abs(line1C), vsum);
-    vsum = vec_sum4s(vec_abs(line2C), vsum);
-    vsum = vec_sum4s(vec_abs(line3C), vsum);
-    vsum = vec_sum4s(vec_abs(line4C), vsum);
-    vsum = vec_sum4s(vec_abs(line5C), vsum);
-    vsum = vec_sum4s(vec_abs(line6C), vsum);
-    vsum = vec_sum4s(vec_abs(line7C), vsum);
-    vsum = vec_sums(vsum, (vector signed int)vzero);
-    vsum = vec_splat(vsum, 3);
-    vec_ste(vsum, 0, &sum);
-  }
-POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1);
-  return sum;
-}
-
-/*
-  16x8 works with 16 elements ; it allows to avoid replicating
-  loads, and give the compiler more rooms for scheduling.
-  It's only used from inside hadamard8_diff16_altivec.
-
-  Unfortunately, it seems gcc-3.3 is a bit dumb, and
-  the compiled code has a LOT of spill code, it seems
-  gcc (unlike xlc) cannot keep everything in registers
-  by itself. The following code include hand-made
-  registers allocation. It's not clean, but on
-  a 7450 the resulting code is much faster (best case
-  fall from 700+ cycles to 550).
-
-  xlc doesn't add spill code, but it doesn't know how to
-  schedule for the 7450, and its code isn't much faster than
-  gcc-3.3 on the 7450 (but uses 25% less instructions...)
-
-  On the 970, the hand-made RA is still a win (arount 690
-  vs. around 780), but xlc goes to around 660 on the
-  regular C code...
-*/
-
-static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) {
-    int sum;
-    register vector signed short
-        temp0 REG_v(v0),
-        temp1 REG_v(v1),
-        temp2 REG_v(v2),
-        temp3 REG_v(v3),
-        temp4 REG_v(v4),
-        temp5 REG_v(v5),
-        temp6 REG_v(v6),
-        temp7 REG_v(v7);
-    register vector signed short
-        temp0S REG_v(v8),
-        temp1S REG_v(v9),
-        temp2S REG_v(v10),
-        temp3S REG_v(v11),
-        temp4S REG_v(v12),
-        temp5S REG_v(v13),
-        temp6S REG_v(v14),
-        temp7S REG_v(v15);
-    register const_vector unsigned char vzero REG_v(v31)=
-        (const_vector unsigned char)vec_splat_u8(0);
-  {
-    register const_vector signed short vprod1 REG_v(v16)=
-        (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1);
-    register const_vector signed short vprod2 REG_v(v17)=
-        (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1);
-    register const_vector signed short vprod3 REG_v(v18)=
-        (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1);
-    register const_vector unsigned char perm1 REG_v(v19)=
-        (const_vector unsigned char)
-        AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
-            0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
-    register const_vector unsigned char perm2 REG_v(v20)=
-        (const_vector unsigned char)
-        AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
-            0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
-    register const_vector unsigned char perm3 REG_v(v21)=
-        (const_vector unsigned char)
-        AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
-            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-
-#define ONEITERBUTTERFLY(i, res1, res2)                                 \
-    {                                                                   \
-      register vector unsigned char src1 REG_v(v22),                    \
-                                    src2 REG_v(v23),                    \
-                                    dst1 REG_v(v24),                    \
-                                    dst2 REG_v(v25),                    \
-                                    srcO REG_v(v22),                    \
-                                    dstO REG_v(v23);                    \
-                                                                        \
-      register vector signed short  srcV REG_v(v24),                    \
-                                    dstV REG_v(v25),                    \
-                                    srcW REG_v(v26),                    \
-                                    dstW REG_v(v27),                    \
-                                    but0 REG_v(v28),                    \
-                                    but0S REG_v(v29),                   \
-                                    op1 REG_v(v30),                     \
-                                    but1 REG_v(v22),                    \
-                                    op1S REG_v(v23),                    \
-                                    but1S REG_v(v24),                   \
-                                    op2 REG_v(v25),                     \
-                                    but2 REG_v(v26),                    \
-                                    op2S REG_v(v27),                    \
-                                    but2S REG_v(v28),                   \
-                                    op3 REG_v(v29),                     \
-                                    op3S REG_v(v30);                    \
-                                                                        \
-      src1 = vec_ld(stride * i, src);                                   \
-      src2 = vec_ld((stride * i) + 16, src);                            \
-      srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src));           \
-      dst1 = vec_ld(stride * i, dst);                                   \
-      dst2 = vec_ld((stride * i) + 16, dst);                            \
-      dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst));           \
-      /* promote the unsigned chars to signed shorts */                 \
-      srcV =                                                            \
-        (vector signed short)vec_mergeh((vector signed char)vzero,      \
-        (vector signed char)srcO);                                      \
-      dstV =                                                            \
-        (vector signed short)vec_mergeh((vector signed char)vzero,      \
-        (vector signed char)dstO);                                      \
-      srcW =                                                            \
-        (vector signed short)vec_mergel((vector signed char)vzero,      \
-        (vector signed char)srcO);                                      \
-      dstW =                                                            \
-        (vector signed short)vec_mergel((vector signed char)vzero,      \
-        (vector signed char)dstO);                                      \
-      /* substractions inside the first butterfly */                    \
-      but0 = vec_sub(srcV, dstV);                                       \
-      but0S = vec_sub(srcW, dstW);                                      \
-      op1 = vec_perm(but0, but0, perm1);                                \
-      but1 = vec_mladd(but0, vprod1, op1);                              \
-      op1S = vec_perm(but0S, but0S, perm1);                             \
-      but1S = vec_mladd(but0S, vprod1, op1S);                           \
-      op2 = vec_perm(but1, but1, perm2);                                \
-      but2 = vec_mladd(but1, vprod2, op2);                              \
-      op2S = vec_perm(but1S, but1S, perm2);                             \
-      but2S = vec_mladd(but1S, vprod2, op2S);                           \
-      op3 = vec_perm(but2, but2, perm3);                                \
-      res1 = vec_mladd(but2, vprod3, op3);                              \
-      op3S = vec_perm(but2S, but2S, perm3);                             \
-      res2 = vec_mladd(but2S, vprod3, op3S);                            \
-    }
-    ONEITERBUTTERFLY(0, temp0, temp0S);
-    ONEITERBUTTERFLY(1, temp1, temp1S);
-    ONEITERBUTTERFLY(2, temp2, temp2S);
-    ONEITERBUTTERFLY(3, temp3, temp3S);
-    ONEITERBUTTERFLY(4, temp4, temp4S);
-    ONEITERBUTTERFLY(5, temp5, temp5S);
-    ONEITERBUTTERFLY(6, temp6, temp6S);
-    ONEITERBUTTERFLY(7, temp7, temp7S);
-  }
-#undef ONEITERBUTTERFLY
-  {
-    register vector signed int vsum;
-    register vector signed short line0S, line1S, line2S, line3S, line4S,
-                                 line5S, line6S, line7S, line0BS,line2BS,
-                                 line1BS,line3BS,line4BS,line6BS,line5BS,
-                                 line7BS,line0CS,line4CS,line1CS,line5CS,
-                                 line2CS,line6CS,line3CS,line7CS;
-
-    register vector signed short line0 = vec_add(temp0, temp1);
-    register vector signed short line1 = vec_sub(temp0, temp1);
-    register vector signed short line2 = vec_add(temp2, temp3);
-    register vector signed short line3 = vec_sub(temp2, temp3);
-    register vector signed short line4 = vec_add(temp4, temp5);
-    register vector signed short line5 = vec_sub(temp4, temp5);
-    register vector signed short line6 = vec_add(temp6, temp7);
-    register vector signed short line7 = vec_sub(temp6, temp7);
-
-    register vector signed short line0B = vec_add(line0, line2);
-    register vector signed short line2B = vec_sub(line0, line2);
-    register vector signed short line1B = vec_add(line1, line3);
-    register vector signed short line3B = vec_sub(line1, line3);
-    register vector signed short line4B = vec_add(line4, line6);
-    register vector signed short line6B = vec_sub(line4, line6);
-    register vector signed short line5B = vec_add(line5, line7);
-    register vector signed short line7B = vec_sub(line5, line7);
-
-    register vector signed short line0C = vec_add(line0B, line4B);
-    register vector signed short line4C = vec_sub(line0B, line4B);
-    register vector signed short line1C = vec_add(line1B, line5B);
-    register vector signed short line5C = vec_sub(line1B, line5B);
-    register vector signed short line2C = vec_add(line2B, line6B);
-    register vector signed short line6C = vec_sub(line2B, line6B);
-    register vector signed short line3C = vec_add(line3B, line7B);
-    register vector signed short line7C = vec_sub(line3B, line7B);
-
-    vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
-    vsum = vec_sum4s(vec_abs(line1C), vsum);
-    vsum = vec_sum4s(vec_abs(line2C), vsum);
-    vsum = vec_sum4s(vec_abs(line3C), vsum);
-    vsum = vec_sum4s(vec_abs(line4C), vsum);
-    vsum = vec_sum4s(vec_abs(line5C), vsum);
-    vsum = vec_sum4s(vec_abs(line6C), vsum);
-    vsum = vec_sum4s(vec_abs(line7C), vsum);
-
-    line0S = vec_add(temp0S, temp1S);
-    line1S = vec_sub(temp0S, temp1S);
-    line2S = vec_add(temp2S, temp3S);
-    line3S = vec_sub(temp2S, temp3S);
-    line4S = vec_add(temp4S, temp5S);
-    line5S = vec_sub(temp4S, temp5S);
-    line6S = vec_add(temp6S, temp7S);
-    line7S = vec_sub(temp6S, temp7S);
-
-    line0BS = vec_add(line0S, line2S);
-    line2BS = vec_sub(line0S, line2S);
-    line1BS = vec_add(line1S, line3S);
-    line3BS = vec_sub(line1S, line3S);
-    line4BS = vec_add(line4S, line6S);
-    line6BS = vec_sub(line4S, line6S);
-    line5BS = vec_add(line5S, line7S);
-    line7BS = vec_sub(line5S, line7S);
-
-    line0CS = vec_add(line0BS, line4BS);
-    line4CS = vec_sub(line0BS, line4BS);
-    line1CS = vec_add(line1BS, line5BS);
-    line5CS = vec_sub(line1BS, line5BS);
-    line2CS = vec_add(line2BS, line6BS);
-    line6CS = vec_sub(line2BS, line6BS);
-    line3CS = vec_add(line3BS, line7BS);
-    line7CS = vec_sub(line3BS, line7BS);
-
-    vsum = vec_sum4s(vec_abs(line0CS), vsum);
-    vsum = vec_sum4s(vec_abs(line1CS), vsum);
-    vsum = vec_sum4s(vec_abs(line2CS), vsum);
-    vsum = vec_sum4s(vec_abs(line3CS), vsum);
-    vsum = vec_sum4s(vec_abs(line4CS), vsum);
-    vsum = vec_sum4s(vec_abs(line5CS), vsum);
-    vsum = vec_sum4s(vec_abs(line6CS), vsum);
-    vsum = vec_sum4s(vec_abs(line7CS), vsum);
-    vsum = vec_sums(vsum, (vector signed int)vzero);
-    vsum = vec_splat(vsum, 3);
-    vec_ste(vsum, 0, &sum);
-  }
-  return sum;
-}
-
-int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
-POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1);
-    int score;
-POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1);
-    score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
-    if (h==16) {
-        dst += 8*stride;
-        src += 8*stride;
-        score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
-    }
-POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
-    return score;
-}
 
 int has_altivec(void)
 {
@@ -1458,127 +93,3 @@ int has_altivec(void)
 #endif /* __AMIGAOS4__ */
 }
 
-static void vorbis_inverse_coupling_altivec(float *mag, float *ang,
-                                            int blocksize)
-{
-    int i;
-    vector float m, a;
-    vector bool int t0, t1;
-    const vector unsigned int v_31 = //XXX
-        vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1));
-    for(i=0; i<blocksize; i+=4) {
-        m = vec_ld(0, mag+i);
-        a = vec_ld(0, ang+i);
-        t0 = vec_cmple(m, (vector float)vec_splat_u32(0));
-        t1 = vec_cmple(a, (vector float)vec_splat_u32(0));
-        a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31));
-        t0 = (vector bool int)vec_and(a, t1);
-        t1 = (vector bool int)vec_andc(a, t1);
-        a = vec_sub(m, (vector float)t1);
-        m = vec_add(m, (vector float)t0);
-        vec_stl(a, 0, ang+i);
-        vec_stl(m, 0, mag+i);
-    }
-}
-
-/* next one assumes that ((line_size % 8) == 0) */
-void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
-POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
-    register int i;
-    register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
-    register vector unsigned char blockv, temp1, temp2, blocktemp;
-    register vector unsigned short pixelssum1, pixelssum2, temp3;
-
-    register const_vector unsigned char vczero = (const_vector unsigned char)
-                                        vec_splat_u8(0);
-    register const_vector unsigned short vctwo = (const_vector unsigned short)
-                                        vec_splat_u16(2);
-
-    temp1 = vec_ld(0, pixels);
-    temp2 = vec_ld(16, pixels);
-    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
-    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
-        pixelsv2 = temp2;
-    } else {
-        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
-    }
-    pixelsv1 = vec_mergeh(vczero, pixelsv1);
-    pixelsv2 = vec_mergeh(vczero, pixelsv2);
-    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
-                         (vector unsigned short)pixelsv2);
-    pixelssum1 = vec_add(pixelssum1, vctwo);
-
-POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
-    for (i = 0; i < h ; i++) {
-        int rightside = ((unsigned long)block & 0x0000000F);
-        blockv = vec_ld(0, block);
-
-        temp1 = vec_ld(line_size, pixels);
-        temp2 = vec_ld(line_size + 16, pixels);
-        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
-        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
-        {
-            pixelsv2 = temp2;
-        } else {
-            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
-        }
-
-        pixelsv1 = vec_mergeh(vczero, pixelsv1);
-        pixelsv2 = vec_mergeh(vczero, pixelsv2);
-        pixelssum2 = vec_add((vector unsigned short)pixelsv1,
-                             (vector unsigned short)pixelsv2);
-        temp3 = vec_add(pixelssum1, pixelssum2);
-        temp3 = vec_sra(temp3, vctwo);
-        pixelssum1 = vec_add(pixelssum2, vctwo);
-        pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
-
-        if (rightside) {
-            blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
-        } else {
-            blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
-        }
-
-        blockv = vec_avg(blocktemp, blockv);
-        vec_st(blockv, 0, block);
-
-        block += line_size;
-        pixels += line_size;
-    }
-
-POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
-}
-
-void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
-{
-    c->pix_abs[0][1] = sad16_x2_altivec;
-    c->pix_abs[0][2] = sad16_y2_altivec;
-    c->pix_abs[0][3] = sad16_xy2_altivec;
-    c->pix_abs[0][0] = sad16_altivec;
-    c->pix_abs[1][0] = sad8_altivec;
-    c->sad[0]= sad16_altivec;
-    c->sad[1]= sad8_altivec;
-    c->pix_norm1 = pix_norm1_altivec;
-    c->sse[1]= sse8_altivec;
-    c->sse[0]= sse16_altivec;
-    c->pix_sum = pix_sum_altivec;
-    c->diff_pixels = diff_pixels_altivec;
-    c->get_pixels = get_pixels_altivec;
-    c->add_bytes= add_bytes_altivec;
-    c->put_pixels_tab[0][0] = put_pixels16_altivec;
-    /* the two functions do the same thing, so use the same code */
-    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
-    c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
-    c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
-    c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
-    c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
-    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
-    c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
-    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
-
-    c->hadamard8_diff[0] = hadamard8_diff16_altivec;
-    c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
-#ifdef CONFIG_VORBIS_DECODER
-    c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec;
-#endif
-}

Modified: trunk/libavcodec/ppc/dsputil_altivec.c
==============================================================================
--- trunk/libavcodec/ppc/dsputil_altivec.c	(original)
+++ trunk/libavcodec/ppc/dsputil_altivec.c	Tue Oct  2 13:39:32 2007
@@ -25,31 +25,7 @@
 #include "gcc_fixes.h"
 
 #include "dsputil_altivec.h"
-
-#ifdef __APPLE__
-#include <sys/sysctl.h>
-#elif __AMIGAOS4__
-#include <exec/exec.h>
-#include <interfaces/exec.h>
-#include <proto/exec.h>
-#else
-#include <signal.h>
-#include <setjmp.h>
-
-static sigjmp_buf jmpbuf;
-static volatile sig_atomic_t canjump = 0;
-
-static void sigill_handler (int sig)
-{
-    if (!canjump) {
-        signal (sig, SIG_DFL);
-        raise (sig);
-    }
-
-    canjump = 0;
-    siglongjmp (jmpbuf, 1);
-}
-#endif /* __APPLE__ */
+#include "util_altivec.h"
 
 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 {
@@ -1417,47 +1393,6 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard
     return score;
 }
 
-int has_altivec(void)
-{
-#ifdef __AMIGAOS4__
-    ULONG result = 0;
-    extern struct ExecIFace *IExec;
-
-    IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
-    if (result == VECTORTYPE_ALTIVEC) return 1;
-    return 0;
-#elif __APPLE__
-    int sels[2] = {CTL_HW, HW_VECTORUNIT};
-    int has_vu = 0;
-    size_t len = sizeof(has_vu);
-    int err;
-
-    err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
-
-    if (err == 0) return (has_vu != 0);
-    return 0;
-#else
-/* Do it the brute-force way, borrowed from the libmpeg2 library. */
-    {
-      signal (SIGILL, sigill_handler);
-      if (sigsetjmp (jmpbuf, 1)) {
-        signal (SIGILL, SIG_DFL);
-      } else {
-        canjump = 1;
-
-        asm volatile ("mtspr 256, %0\n\t"
-                      "vand %%v0, %%v0, %%v0"
-                      :
-                      : "r" (-1));
-
-        signal (SIGILL, SIG_DFL);
-        return 1;
-      }
-    }
-    return 0;
-#endif /* __AMIGAOS4__ */
-}
-
 static void vorbis_inverse_coupling_altivec(float *mag, float *ang,
                                             int blocksize)
 {

Modified: trunk/libavcodec/ppc/dsputil_altivec.h
==============================================================================
--- trunk/libavcodec/ppc/dsputil_altivec.h	(original)
+++ trunk/libavcodec/ppc/dsputil_altivec.h	Tue Oct  2 13:39:32 2007
@@ -31,83 +31,4 @@ void put_pixels16_altivec(uint8_t *block
 
 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
 
-// used to build registers permutation vectors (vcprm)
-// the 's' are for words in the _s_econd vector
-#define WORD_0 0x00,0x01,0x02,0x03
-#define WORD_1 0x04,0x05,0x06,0x07
-#define WORD_2 0x08,0x09,0x0a,0x0b
-#define WORD_3 0x0c,0x0d,0x0e,0x0f
-#define WORD_s0 0x10,0x11,0x12,0x13
-#define WORD_s1 0x14,0x15,0x16,0x17
-#define WORD_s2 0x18,0x19,0x1a,0x1b
-#define WORD_s3 0x1c,0x1d,0x1e,0x1f
-
-#ifdef __APPLE_CC__
-#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d)
-#else
-#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
-#endif
-
-// vcprmle is used to keep the same index as in the SSE version.
-// it's the same as vcprm, with the index inversed
-// ('le' is Little Endian)
-#define vcprmle(a,b,c,d) vcprm(d,c,b,a)
-
-// used to build inverse/identity vectors (vcii)
-// n is _n_egative, p is _p_ositive
-#define FLOAT_n -1.
-#define FLOAT_p 1.
-
-
-#ifdef __APPLE_CC__
-#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)
-#else
-#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
-#endif
-
-// Transpose 8x8 matrix of 16-bit elements (in-place)
-#define TRANSPOSE8(a,b,c,d,e,f,g,h) \
-do { \
-    vector signed short A1, B1, C1, D1, E1, F1, G1, H1; \
-    vector signed short A2, B2, C2, D2, E2, F2, G2, H2; \
- \
-    A1 = vec_mergeh (a, e); \
-    B1 = vec_mergel (a, e); \
-    C1 = vec_mergeh (b, f); \
-    D1 = vec_mergel (b, f); \
-    E1 = vec_mergeh (c, g); \
-    F1 = vec_mergel (c, g); \
-    G1 = vec_mergeh (d, h); \
-    H1 = vec_mergel (d, h); \
- \
-    A2 = vec_mergeh (A1, E1); \
-    B2 = vec_mergel (A1, E1); \
-    C2 = vec_mergeh (B1, F1); \
-    D2 = vec_mergel (B1, F1); \
-    E2 = vec_mergeh (C1, G1); \
-    F2 = vec_mergel (C1, G1); \
-    G2 = vec_mergeh (D1, H1); \
-    H2 = vec_mergel (D1, H1); \
- \
-    a = vec_mergeh (A2, E2); \
-    b = vec_mergel (A2, E2); \
-    c = vec_mergeh (B2, F2); \
-    d = vec_mergel (B2, F2); \
-    e = vec_mergeh (C2, G2); \
-    f = vec_mergel (C2, G2); \
-    g = vec_mergeh (D2, H2); \
-    h = vec_mergel (D2, H2); \
-} while (0)
-
-
-/** \brief loads unaligned vector \a *src with offset \a offset
-    and returns it */
-static inline vector unsigned char unaligned_load(int offset, uint8_t *src)
-{
-    register vector unsigned char first = vec_ld(offset, src);
-    register vector unsigned char second = vec_ld(offset+15, src);
-    register vector unsigned char mask = vec_lvsl(offset, src);
-    return vec_perm(first, second, mask);
-}
-
 #endif /* DSPUTIL_ALTIVEC_H */

Modified: trunk/libavcodec/ppc/fft_altivec.c
==============================================================================
--- trunk/libavcodec/ppc/fft_altivec.c	(original)
+++ trunk/libavcodec/ppc/fft_altivec.c	Tue Oct  2 13:39:32 2007
@@ -24,8 +24,8 @@
 
 #include "gcc_fixes.h"
 
-#include "dsputil_altivec.h"
-
+#include "dsputil_ppc.h"
+#include "util_altivec.h"
 /*
   those three macros are from libavcodec/fft.c
   and are required for the reference C code

Modified: trunk/libavcodec/ppc/gmc_altivec.c
==============================================================================
--- trunk/libavcodec/ppc/gmc_altivec.c	(original)
+++ trunk/libavcodec/ppc/gmc_altivec.c	Tue Oct  2 13:39:32 2007
@@ -24,7 +24,8 @@
 
 #include "gcc_fixes.h"
 
-#include "dsputil_altivec.h"
+#include "dsputil_ppc.h"
+#include "util_altivec.h"
 
 /*
   altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,

Modified: trunk/libavcodec/ppc/h264_altivec.c
==============================================================================
--- trunk/libavcodec/ppc/h264_altivec.c	(original)
+++ trunk/libavcodec/ppc/h264_altivec.c	Tue Oct  2 13:39:32 2007
@@ -22,7 +22,8 @@
 
 #include "gcc_fixes.h"
 
-#include "dsputil_altivec.h"
+#include "dsputil_ppc.h"
+#include "util_altivec.h"
 #include "types_altivec.h"
 
 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s

Copied: trunk/libavcodec/ppc/imgresample_altivec.c (from r10492, /trunk/libavcodec/imgresample.c)
==============================================================================
--- /trunk/libavcodec/imgresample.c	(original)
+++ trunk/libavcodec/ppc/imgresample_altivec.c	Tue Oct  2 13:39:32 2007
@@ -20,268 +20,12 @@
  */
 
 /**
- * @file imgresample.c
- * High quality image resampling with polyphase filters .
+ * @file imgresample_altivec.c
+ * High quality image resampling with polyphase filters - AltiVec bits
  */
 
-#include "avcodec.h"
-#include "swscale.h"
-#include "dsputil.h"
-
-#define NB_COMPONENTS 3
-
-#define PHASE_BITS 4
-#define NB_PHASES  (1 << PHASE_BITS)
-#define NB_TAPS    4
-#define FCENTER    1  /* index of the center of the filter */
-//#define TEST    1  /* Test it */
-
-#define POS_FRAC_BITS 16
-#define POS_FRAC      (1 << POS_FRAC_BITS)
-/* 6 bits precision is needed for MMX */
-#define FILTER_BITS   8
-
-#define LINE_BUF_HEIGHT (NB_TAPS * 4)
-
-struct SwsContext {
-    AVClass *av_class;
-    struct ImgReSampleContext *resampling_ctx;
-    enum PixelFormat src_pix_fmt, dst_pix_fmt;
-};
-
-struct ImgReSampleContext {
-    int iwidth, iheight, owidth, oheight;
-    int topBand, bottomBand, leftBand, rightBand;
-    int padtop, padbottom, padleft, padright;
-    int pad_owidth, pad_oheight;
-    int h_incr, v_incr;
-    DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
-    DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
-    uint8_t *line_buf;
-};
-
-void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
-
-static inline int get_phase(int pos)
-{
-    return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
-}
-
-/* This function must be optimized */
-static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
-                            int src_width, int src_start, int src_incr,
-                            int16_t *filters)
-{
-    int src_pos, phase, sum, i;
-    const uint8_t *s;
-    int16_t *filter;
-
-    src_pos = src_start;
-    for(i=0;i<dst_width;i++) {
-#ifdef TEST
-        /* test */
-        if ((src_pos >> POS_FRAC_BITS) < 0 ||
-            (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
-            av_abort();
-#endif
-        s = src + (src_pos >> POS_FRAC_BITS);
-        phase = get_phase(src_pos);
-        filter = filters + phase * NB_TAPS;
-#if NB_TAPS == 4
-        sum = s[0] * filter[0] +
-            s[1] * filter[1] +
-            s[2] * filter[2] +
-            s[3] * filter[3];
-#else
-        {
-            int j;
-            sum = 0;
-            for(j=0;j<NB_TAPS;j++)
-                sum += s[j] * filter[j];
-        }
-#endif
-        sum = sum >> FILTER_BITS;
-        if (sum < 0)
-            sum = 0;
-        else if (sum > 255)
-            sum = 255;
-        dst[0] = sum;
-        src_pos += src_incr;
-        dst++;
-    }
-}
-
-/* This function must be optimized */
-static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
-                       int wrap, int16_t *filter)
-{
-    int sum, i;
-    const uint8_t *s;
-
-    s = src;
-    for(i=0;i<dst_width;i++) {
-#if NB_TAPS == 4
-        sum = s[0 * wrap] * filter[0] +
-            s[1 * wrap] * filter[1] +
-            s[2 * wrap] * filter[2] +
-            s[3 * wrap] * filter[3];
-#else
-        {
-            int j;
-            uint8_t *s1 = s;
-
-            sum = 0;
-            for(j=0;j<NB_TAPS;j++) {
-                sum += s1[0] * filter[j];
-                s1 += wrap;
-            }
-        }
-#endif
-        sum = sum >> FILTER_BITS;
-        if (sum < 0)
-            sum = 0;
-        else if (sum > 255)
-            sum = 255;
-        dst[0] = sum;
-        dst++;
-        s++;
-    }
-}
-
-#ifdef HAVE_MMX
-
-#include "i386/mmx.h"
-
-#define FILTER4(reg) \
-{\
-        s = src + (src_pos >> POS_FRAC_BITS);\
-        phase = get_phase(src_pos);\
-        filter = filters + phase * NB_TAPS;\
-        movq_m2r(*s, reg);\
-        punpcklbw_r2r(mm7, reg);\
-        movq_m2r(*filter, mm6);\
-        pmaddwd_r2r(reg, mm6);\
-        movq_r2r(mm6, reg);\
-        psrlq_i2r(32, reg);\
-        paddd_r2r(mm6, reg);\
-        psrad_i2r(FILTER_BITS, reg);\
-        src_pos += src_incr;\
-}
-
-#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
-
-/* XXX: do four pixels at a time */
-static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
-                                 const uint8_t *src, int src_width,
-                                 int src_start, int src_incr, int16_t *filters)
-{
-    int src_pos, phase;
-    const uint8_t *s;
-    int16_t *filter;
-    mmx_t tmp;
-
-    src_pos = src_start;
-    pxor_r2r(mm7, mm7);
-
-    while (dst_width >= 4) {
-
-        FILTER4(mm0);
-        FILTER4(mm1);
-        FILTER4(mm2);
-        FILTER4(mm3);
-
-        packuswb_r2r(mm7, mm0);
-        packuswb_r2r(mm7, mm1);
-        packuswb_r2r(mm7, mm3);
-        packuswb_r2r(mm7, mm2);
-        movq_r2m(mm0, tmp);
-        dst[0] = tmp.ub[0];
-        movq_r2m(mm1, tmp);
-        dst[1] = tmp.ub[0];
-        movq_r2m(mm2, tmp);
-        dst[2] = tmp.ub[0];
-        movq_r2m(mm3, tmp);
-        dst[3] = tmp.ub[0];
-        dst += 4;
-        dst_width -= 4;
-    }
-    while (dst_width > 0) {
-        FILTER4(mm0);
-        packuswb_r2r(mm7, mm0);
-        movq_r2m(mm0, tmp);
-        dst[0] = tmp.ub[0];
-        dst++;
-        dst_width--;
-    }
-    emms();
-}
-
-static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
-                            int wrap, int16_t *filter)
-{
-    int sum, i, v;
-    const uint8_t *s;
-    mmx_t tmp;
-    mmx_t coefs[4];
-
-    for(i=0;i<4;i++) {
-        v = filter[i];
-        coefs[i].uw[0] = v;
-        coefs[i].uw[1] = v;
-        coefs[i].uw[2] = v;
-        coefs[i].uw[3] = v;
-    }
-
-    pxor_r2r(mm7, mm7);
-    s = src;
-    while (dst_width >= 4) {
-        movq_m2r(s[0 * wrap], mm0);
-        punpcklbw_r2r(mm7, mm0);
-        movq_m2r(s[1 * wrap], mm1);
-        punpcklbw_r2r(mm7, mm1);
-        movq_m2r(s[2 * wrap], mm2);
-        punpcklbw_r2r(mm7, mm2);
-        movq_m2r(s[3 * wrap], mm3);
-        punpcklbw_r2r(mm7, mm3);
-
-        pmullw_m2r(coefs[0], mm0);
-        pmullw_m2r(coefs[1], mm1);
-        pmullw_m2r(coefs[2], mm2);
-        pmullw_m2r(coefs[3], mm3);
-
-        paddw_r2r(mm1, mm0);
-        paddw_r2r(mm3, mm2);
-        paddw_r2r(mm2, mm0);
-        psraw_i2r(FILTER_BITS, mm0);
-
-        packuswb_r2r(mm7, mm0);
-        movq_r2m(mm0, tmp);
-
-        *(uint32_t *)dst = tmp.ud[0];
-        dst += 4;
-        s += 4;
-        dst_width -= 4;
-    }
-    while (dst_width > 0) {
-        sum = s[0 * wrap] * filter[0] +
-            s[1 * wrap] * filter[1] +
-            s[2 * wrap] * filter[2] +
-            s[3 * wrap] * filter[3];
-        sum = sum >> FILTER_BITS;
-        if (sum < 0)
-            sum = 0;
-        else if (sum > 255)
-            sum = 255;
-        dst[0] = sum;
-        dst++;
-        s++;
-        dst_width--;
-    }
-    emms();
-}
-#endif /* HAVE_MMX */
+#include "gcc_fixes.h"
 
-#ifdef HAVE_ALTIVEC
 typedef         union {
     vector unsigned char v;
     unsigned char c[16];
@@ -406,545 +150,4 @@ void v_resample16_altivec(uint8_t *dst, 
         dst_width--;
     }
 }
-#endif /* HAVE_ALTIVEC */
-
-/* slow version to handle limit cases. Does not need optimisation */
-static void h_resample_slow(uint8_t *dst, int dst_width,
-                            const uint8_t *src, int src_width,
-                            int src_start, int src_incr, int16_t *filters)
-{
-    int src_pos, phase, sum, j, v, i;
-    const uint8_t *s, *src_end;
-    int16_t *filter;
-
-    src_end = src + src_width;
-    src_pos = src_start;
-    for(i=0;i<dst_width;i++) {
-        s = src + (src_pos >> POS_FRAC_BITS);
-        phase = get_phase(src_pos);
-        filter = filters + phase * NB_TAPS;
-        sum = 0;
-        for(j=0;j<NB_TAPS;j++) {
-            if (s < src)
-                v = src[0];
-            else if (s >= src_end)
-                v = src_end[-1];
-            else
-                v = s[0];
-            sum += v * filter[j];
-            s++;
-        }
-        sum = sum >> FILTER_BITS;
-        if (sum < 0)
-            sum = 0;
-        else if (sum > 255)
-            sum = 255;
-        dst[0] = sum;
-        src_pos += src_incr;
-        dst++;
-    }
-}
-
-static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
-                       int src_width, int src_start, int src_incr,
-                       int16_t *filters)
-{
-    int n, src_end;
-
-    if (src_start < 0) {
-        n = (0 - src_start + src_incr - 1) / src_incr;
-        h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
-        dst += n;
-        dst_width -= n;
-        src_start += n * src_incr;
-    }
-    src_end = src_start + dst_width * src_incr;
-    if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
-        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
-            src_incr;
-    } else {
-        n = dst_width;
-    }
-#ifdef HAVE_MMX
-    if ((mm_flags & MM_MMX) && NB_TAPS == 4)
-        h_resample_fast4_mmx(dst, n,
-                             src, src_width, src_start, src_incr, filters);
-    else
-#endif
-        h_resample_fast(dst, n,
-                        src, src_width, src_start, src_incr, filters);
-    if (n < dst_width) {
-        dst += n;
-        dst_width -= n;
-        src_start += n * src_incr;
-        h_resample_slow(dst, dst_width,
-                        src, src_width, src_start, src_incr, filters);
-    }
-}
-
-static void component_resample(ImgReSampleContext *s,
-                               uint8_t *output, int owrap, int owidth, int oheight,
-                               uint8_t *input, int iwrap, int iwidth, int iheight)
-{
-    int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
-    uint8_t *new_line, *src_line;
-
-    last_src_y = - FCENTER - 1;
-    /* position of the bottom of the filter in the source image */
-    src_y = (last_src_y + NB_TAPS) * POS_FRAC;
-    ring_y = NB_TAPS; /* position in ring buffer */
-    for(y=0;y<oheight;y++) {
-        /* apply horizontal filter on new lines from input if needed */
-        src_y1 = src_y >> POS_FRAC_BITS;
-        while (last_src_y < src_y1) {
-            if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
-                ring_y = NB_TAPS;
-            last_src_y++;
-            /* handle limit conditions : replicate line (slightly
-               inefficient because we filter multiple times) */
-            y1 = last_src_y;
-            if (y1 < 0) {
-                y1 = 0;
-            } else if (y1 >= iheight) {
-                y1 = iheight - 1;
-            }
-            src_line = input + y1 * iwrap;
-            new_line = s->line_buf + ring_y * owidth;
-            /* apply filter and handle limit cases correctly */
-            h_resample(new_line, owidth,
-                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
-                       &s->h_filters[0][0]);
-            /* handle ring buffer wraping */
-            if (ring_y >= LINE_BUF_HEIGHT) {
-                memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
-                       new_line, owidth);
-            }
-        }
-        /* apply vertical filter */
-        phase_y = get_phase(src_y);
-#ifdef HAVE_MMX
-        /* desactivated MMX because loss of precision */
-        if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
-            v_resample4_mmx(output, owidth,
-                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
-                            &s->v_filters[phase_y][0]);
-        else
-#endif
-#ifdef HAVE_ALTIVEC
-            if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
-                v_resample16_altivec(output, owidth,
-                                s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
-                                &s->v_filters[phase_y][0]);
-        else
-#endif
-            v_resample(output, owidth,
-                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
-                       &s->v_filters[phase_y][0]);
-
-        src_y += s->v_incr;
-
-        output += owrap;
-    }
-}
 
-ImgReSampleContext *img_resample_init(int owidth, int oheight,
-                                      int iwidth, int iheight)
-{
-    return img_resample_full_init(owidth, oheight, iwidth, iheight,
-            0, 0, 0, 0, 0, 0, 0, 0);
-}
-
-ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
-                                      int iwidth, int iheight,
-                                      int topBand, int bottomBand,
-        int leftBand, int rightBand,
-        int padtop, int padbottom,
-        int padleft, int padright)
-{
-    ImgReSampleContext *s;
-
-    if (!owidth || !oheight || !iwidth || !iheight)
-        return NULL;
-
-    s = av_mallocz(sizeof(ImgReSampleContext));
-    if (!s)
-        return NULL;
-    if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
-        return NULL;
-    s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
-    if (!s->line_buf)
-        goto fail;
-
-    s->owidth = owidth;
-    s->oheight = oheight;
-    s->iwidth = iwidth;
-    s->iheight = iheight;
-
-    s->topBand = topBand;
-    s->bottomBand = bottomBand;
-    s->leftBand = leftBand;
-    s->rightBand = rightBand;
-
-    s->padtop = padtop;
-    s->padbottom = padbottom;
-    s->padleft = padleft;
-    s->padright = padright;
-
-    s->pad_owidth = owidth - (padleft + padright);
-    s->pad_oheight = oheight - (padtop + padbottom);
-
-    s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
-    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
-
-    av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth  /
-            (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
-    av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
-            (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
-
-    return s;
-fail:
-    av_free(s);
-    return NULL;
-}
-
-void img_resample(ImgReSampleContext *s,
-                  AVPicture *output, const AVPicture *input)
-{
-    int i, shift;
-    uint8_t* optr;
-
-    for (i=0;i<3;i++) {
-        shift = (i == 0) ? 0 : 1;
-
-        optr = output->data[i] + (((output->linesize[i] *
-                        s->padtop) + s->padleft) >> shift);
-
-        component_resample(s, optr, output->linesize[i],
-                s->pad_owidth >> shift, s->pad_oheight >> shift,
-                input->data[i] + (input->linesize[i] *
-                    (s->topBand >> shift)) + (s->leftBand >> shift),
-                input->linesize[i], ((s->iwidth - s->leftBand -
-                        s->rightBand) >> shift),
-                           (s->iheight - s->topBand - s->bottomBand) >> shift);
-    }
-}
-
-void img_resample_close(ImgReSampleContext *s)
-{
-    av_free(s->line_buf);
-    av_free(s);
-}
-
-struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
-                                  int dstW, int dstH, int dstFormat,
-                                  int flags, SwsFilter *srcFilter,
-                                  SwsFilter *dstFilter, double *param)
-{
-    struct SwsContext *ctx;
-
-    ctx = av_malloc(sizeof(struct SwsContext));
-    if (ctx)
-        ctx->av_class = av_mallocz(sizeof(AVClass));
-    if (!ctx || !ctx->av_class) {
-        av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
-
-        return NULL;
-    }
-
-    if ((srcH != dstH) || (srcW != dstW)) {
-        if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
-            av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
-        }
-        ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
-    } else {
-        ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
-        ctx->resampling_ctx->iheight = srcH;
-        ctx->resampling_ctx->iwidth = srcW;
-        ctx->resampling_ctx->oheight = dstH;
-        ctx->resampling_ctx->owidth = dstW;
-    }
-    ctx->src_pix_fmt = srcFormat;
-    ctx->dst_pix_fmt = dstFormat;
-
-    return ctx;
-}
-
-void sws_freeContext(struct SwsContext *ctx)
-{
-    if (!ctx)
-        return;
-    if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
-        (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
-        img_resample_close(ctx->resampling_ctx);
-    } else {
-        av_free(ctx->resampling_ctx);
-    }
-    av_free(ctx->av_class);
-    av_free(ctx);
-}
-
-
-/**
- * Checks if context is valid or reallocs a new one instead.
- * If context is NULL, just calls sws_getContext() to get a new one.
- * Otherwise, checks if the parameters are the same already saved in context.
- * If that is the case, returns the current context.
- * Otherwise, frees context and gets a new one.
- *
- * Be warned that srcFilter, dstFilter are not checked, they are
- * asumed to remain valid.
- */
-struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
-                        int srcW, int srcH, int srcFormat,
-                        int dstW, int dstH, int dstFormat, int flags,
-                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
-{
-    if (ctx != NULL) {
-        if ((ctx->resampling_ctx->iwidth != srcW) ||
-                        (ctx->resampling_ctx->iheight != srcH) ||
-                        (ctx->src_pix_fmt != srcFormat) ||
-                        (ctx->resampling_ctx->owidth != dstW) ||
-                        (ctx->resampling_ctx->oheight != dstH) ||
-                        (ctx->dst_pix_fmt != dstFormat))
-        {
-            sws_freeContext(ctx);
-            ctx = NULL;
-        }
-    }
-    if (ctx == NULL) {
-        return sws_getContext(srcW, srcH, srcFormat,
-                        dstW, dstH, dstFormat, flags,
-                        srcFilter, dstFilter, param);
-    }
-    return ctx;
-}
-
-int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
-              int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    AVPicture src_pict, dst_pict;
-    int i, res = 0;
-    AVPicture picture_format_temp;
-    AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
-    uint8_t *buf1 = NULL, *buf2 = NULL;
-    enum PixelFormat current_pix_fmt;
-
-    for (i = 0; i < 4; i++) {
-        src_pict.data[i] = src[i];
-        src_pict.linesize[i] = srcStride[i];
-        dst_pict.data[i] = dst[i];
-        dst_pict.linesize[i] = dstStride[i];
-    }
-    if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
-        (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
-        /* We have to rescale the picture, but only YUV420P rescaling is supported... */
-
-        if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
-            int size;
-
-            /* create temporary picture for rescaling input*/
-            size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
-            buf1 = av_malloc(size);
-            if (!buf1) {
-                res = -1;
-                goto the_end;
-            }
-            formatted_picture = &picture_format_temp;
-            avpicture_fill((AVPicture*)formatted_picture, buf1,
-                           PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
-
-            if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
-                            &src_pict, ctx->src_pix_fmt,
-                            ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
-
-                av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
-                res = -1;
-                goto the_end;
-            }
-        } else {
-            formatted_picture = &src_pict;
-        }
-
-        if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
-            int size;
-
-            /* create temporary picture for rescaling output*/
-            size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
-            buf2 = av_malloc(size);
-            if (!buf2) {
-                res = -1;
-                goto the_end;
-            }
-            resampled_picture = &picture_resample_temp;
-            avpicture_fill((AVPicture*)resampled_picture, buf2,
-                           PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
-
-        } else {
-            resampled_picture = &dst_pict;
-        }
-
-        /* ...and finally rescale!!! */
-        img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
-        current_pix_fmt = PIX_FMT_YUV420P;
-    } else {
-        resampled_picture = &src_pict;
-        current_pix_fmt = ctx->src_pix_fmt;
-    }
-
-    if (current_pix_fmt != ctx->dst_pix_fmt) {
-        if (img_convert(&dst_pict, ctx->dst_pix_fmt,
-                        resampled_picture, current_pix_fmt,
-                        ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
-
-            av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
-
-            res = -1;
-            goto the_end;
-        }
-    } else if (resampled_picture != &dst_pict) {
-        av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
-                        ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
-    }
-
-the_end:
-    av_free(buf1);
-    av_free(buf2);
-    return res;
-}
-
-
-#ifdef TEST
-#include <stdio.h>
-#undef exit
-
-/* input */
-#define XSIZE 256
-#define YSIZE 256
-uint8_t img[XSIZE * YSIZE];
-
-/* output */
-#define XSIZE1 512
-#define YSIZE1 512
-uint8_t img1[XSIZE1 * YSIZE1];
-uint8_t img2[XSIZE1 * YSIZE1];
-
-void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
-{
-#undef fprintf
-    FILE *f;
-    f=fopen(filename,"w");
-    fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
-    fwrite(img,1, xsize * ysize,f);
-    fclose(f);
-#define fprintf please_use_av_log
-}
-
-static void dump_filter(int16_t *filter)
-{
-    int i, ph;
-
-    for(ph=0;ph<NB_PHASES;ph++) {
-        av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
-        for(i=0;i<NB_TAPS;i++) {
-            av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
-        }
-        av_log(NULL, AV_LOG_INFO, "\n");
-    }
-}
-
-#ifdef HAVE_MMX
-int mm_flags;
-#endif
-
-int main(int argc, char **argv)
-{
-    int x, y, v, i, xsize, ysize;
-    ImgReSampleContext *s;
-    float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
-    char buf[256];
-
-    /* build test image */
-    for(y=0;y<YSIZE;y++) {
-        for(x=0;x<XSIZE;x++) {
-            if (x < XSIZE/2 && y < YSIZE/2) {
-                if (x < XSIZE/4 && y < YSIZE/4) {
-                    if ((x % 10) <= 6 &&
-                        (y % 10) <= 6)
-                        v = 0xff;
-                    else
-                        v = 0x00;
-                } else if (x < XSIZE/4) {
-                    if (x & 1)
-                        v = 0xff;
-                    else
-                        v = 0;
-                } else if (y < XSIZE/4) {
-                    if (y & 1)
-                        v = 0xff;
-                    else
-                        v = 0;
-                } else {
-                    if (y < YSIZE*3/8) {
-                        if ((y+x) & 1)
-                            v = 0xff;
-                        else
-                            v = 0;
-                    } else {
-                        if (((x+3) % 4) <= 1 &&
-                            ((y+3) % 4) <= 1)
-                            v = 0xff;
-                        else
-                            v = 0x00;
-                    }
-                }
-            } else if (x < XSIZE/2) {
-                v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
-            } else if (y < XSIZE/2) {
-                v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
-            } else {
-                v = ((x + y - XSIZE) * 255) / XSIZE;
-            }
-            img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
-        }
-    }
-    save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
-    for(i=0;i<sizeof(factors)/sizeof(float);i++) {
-        fact = factors[i];
-        xsize = (int)(XSIZE * fact);
-        ysize = (int)((YSIZE - 100) * fact);
-        s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
-        av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
-        dump_filter(&s->h_filters[0][0]);
-        component_resample(s, img1, xsize, xsize, ysize,
-                           img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
-        img_resample_close(s);
-
-        snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
-        save_pgm(buf, img1, xsize, ysize);
-    }
-
-    /* mmx test */
-#ifdef HAVE_MMX
-    av_log(NULL, AV_LOG_INFO, "MMX test\n");
-    fact = 0.72;
-    xsize = (int)(XSIZE * fact);
-    ysize = (int)(YSIZE * fact);
-    mm_flags = MM_MMX;
-    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
-    component_resample(s, img1, xsize, xsize, ysize,
-                       img, XSIZE, XSIZE, YSIZE);
-
-    mm_flags = 0;
-    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
-    component_resample(s, img2, xsize, xsize, ysize,
-                       img, XSIZE, XSIZE, YSIZE);
-    if (memcmp(img1, img2, xsize * ysize) != 0) {
-        av_log(NULL, AV_LOG_ERROR, "mmx error\n");
-        exit(1);
-    }
-    av_log(NULL, AV_LOG_INFO, "MMX OK\n");
-#endif /* HAVE_MMX */
-    return 0;
-}
-
-#endif /* TEST */

Added: trunk/libavcodec/ppc/imgresample_altivec.h
==============================================================================
--- (empty file)
+++ trunk/libavcodec/ppc/imgresample_altivec.h	Tue Oct  2 13:39:32 2007
@@ -0,0 +1,24 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef IMGRESAMPLE_ALTIVEC_H
+#define IMGRESAMPLE_ALTIVEC_H
+
+void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
+                          int wrap, int16_t *filter);
+#endif /* IMGRESAMPLE_ALTIVEC_H */

Modified: trunk/libavcodec/ppc/mpegvideo_altivec.c
==============================================================================
--- trunk/libavcodec/ppc/mpegvideo_altivec.c	(original)
+++ trunk/libavcodec/ppc/mpegvideo_altivec.c	Tue Oct  2 13:39:32 2007
@@ -28,8 +28,8 @@
 
 #include "gcc_fixes.h"
 
-#include "dsputil_altivec.h"
-
+#include "dsputil_ppc.h"
+#include "util_altivec.h"
 // Swaps two variables (used for altivec registers)
 #define SWAP(a,b) \
 do { \

Copied: trunk/libavcodec/ppc/util_altivec.h (from r10492, /trunk/libavcodec/ppc/dsputil_altivec.h)
==============================================================================
--- /trunk/libavcodec/ppc/dsputil_altivec.h	(original)
+++ trunk/libavcodec/ppc/util_altivec.h	Tue Oct  2 13:39:32 2007
@@ -1,8 +1,4 @@
 /*
- * Copyright (c) 2002 Brian Foley
- * Copyright (c) 2002 Dieter Shirley
- * Copyright (c) 2003-2004 Romain Dolbeau <romain at dolbeau.org>
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -20,16 +16,13 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef DSPUTIL_ALTIVEC_H
-#define DSPUTIL_ALTIVEC_H
-
-#include "dsputil_ppc.h"
-
-extern int has_altivec(void);
-
-void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+/**
+ * @file util_altivec.h
+ * Contains misc utility macros and inline functions
+ */
 
-void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+#ifndef UTIL_ALTIVEC_H
+#define UTIL_ALTIVEC_H
 
 // used to build registers permutation vectors (vcprm)
 // the 's' are for words in the _s_econd vector
@@ -110,4 +103,4 @@ static inline vector unsigned char unali
     return vec_perm(first, second, mask);
 }
 
-#endif /* DSPUTIL_ALTIVEC_H */
+#endif /* UTIL_ALTIVEC_H */

Modified: trunk/libavcodec/ppc/vc1dsp_altivec.c
==============================================================================
--- trunk/libavcodec/ppc/vc1dsp_altivec.c	(original)
+++ trunk/libavcodec/ppc/vc1dsp_altivec.c	Tue Oct  2 13:39:32 2007
@@ -23,7 +23,7 @@
 
 #include "gcc_fixes.h"
 
-#include "dsputil_altivec.h"
+#include "util_altivec.h"
 
 // main steps of 8x8 transform
 #define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \