00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavutil/mem.h"
00024 #include "libavutil/ppc/types_altivec.h"
00025 #include "libavutil/ppc/util_altivec.h"
00026 #include "libavcodec/dsputil.h"
00027 #include "dsputil_altivec.h"
00028
00029
00030
00031
00032
00033 void ff_gmc1_altivec(uint8_t *dst , uint8_t *src , int stride, int h, int x16, int y16, int rounder)
00034 {
00035 const DECLARE_ALIGNED(16, unsigned short, rounder_a) = rounder;
00036 const DECLARE_ALIGNED(16, unsigned short, ABCD)[8] =
00037 {
00038 (16-x16)*(16-y16),
00039 ( x16)*(16-y16),
00040 (16-x16)*( y16),
00041 ( x16)*( y16),
00042 0, 0, 0, 0
00043 };
00044 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
00045 register const vector unsigned short vcsr8 = (const vector unsigned short)vec_splat_u16(8);
00046 register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
00047 register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD;
00048 int i;
00049 unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
00050 unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
00051
00052 tempA = vec_ld(0, (const unsigned short*)ABCD);
00053 Av = vec_splat(tempA, 0);
00054 Bv = vec_splat(tempA, 1);
00055 Cv = vec_splat(tempA, 2);
00056 Dv = vec_splat(tempA, 3);
00057
00058 rounderV = vec_splat((vec_u16)vec_lde(0, &rounder_a), 0);
00059
00060
00061
00062
00063
00064
00065 src_0 = vec_ld(0, src);
00066 src_1 = vec_ld(16, src);
00067 srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
00068
00069 if (src_really_odd != 0x0000000F) {
00070
00071
00072 srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
00073 } else {
00074 srcvB = src_1;
00075 }
00076 srcvA = vec_mergeh(vczero, srcvA);
00077 srcvB = vec_mergeh(vczero, srcvB);
00078
00079 for(i=0; i<h; i++) {
00080 dst_odd = (unsigned long)dst & 0x0000000F;
00081 src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
00082
00083 dstv = vec_ld(0, dst);
00084
00085
00086
00087
00088
00089 src_0 = vec_ld(stride + 0, src);
00090 src_1 = vec_ld(stride + 16, src);
00091 srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
00092
00093 if (src_really_odd != 0x0000000F) {
00094
00095
00096 srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
00097 } else {
00098 srcvD = src_1;
00099 }
00100
00101 srcvC = vec_mergeh(vczero, srcvC);
00102 srcvD = vec_mergeh(vczero, srcvD);
00103
00104
00105
00106
00107
00108 tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
00109 tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
00110 tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
00111 tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
00112
00113 srcvA = srcvC;
00114 srcvB = srcvD;
00115
00116 tempD = vec_sr(tempD, vcsr8);
00117
00118 dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
00119
00120 if (dst_odd) {
00121 dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
00122 } else {
00123 dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
00124 }
00125
00126 vec_st(dstv2, 0, dst);
00127
00128 dst += stride;
00129 src += stride;
00130 }
00131 }