00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "libavutil/ppc/util_altivec.h"
00022 #include "libavcodec/dsputil.h"
00023
00024 #include "dsputil_altivec.h"
00025
00026 static void vector_fmul_reverse_altivec(float *dst, const float *src0,
00027 const float *src1, int len)
00028 {
00029 int i;
00030 vector float d, s0, s1, h0, l0,
00031 s2, s3, zero = (vector float)vec_splat_u32(0);
00032 src1 += len-4;
00033 for(i=0; i<len-7; i+=8) {
00034 s1 = vec_ld(0, src1-i);
00035 s0 = vec_ld(0, src0+i);
00036 l0 = vec_mergel(s1, s1);
00037 s3 = vec_ld(-16, src1-i);
00038 h0 = vec_mergeh(s1, s1);
00039 s2 = vec_ld(16, src0+i);
00040 s1 = vec_mergeh(vec_mergel(l0,h0),
00041 vec_mergeh(l0,h0));
00042
00043 l0 = vec_mergel(s3, s3);
00044 d = vec_madd(s0, s1, zero);
00045 h0 = vec_mergeh(s3, s3);
00046 vec_st(d, 0, dst+i);
00047 s3 = vec_mergeh(vec_mergel(l0,h0),
00048 vec_mergeh(l0,h0));
00049 d = vec_madd(s2, s3, zero);
00050 vec_st(d, 16, dst+i);
00051 }
00052 }
00053
00054 static void vector_fmul_add_altivec(float *dst, const float *src0,
00055 const float *src1, const float *src2,
00056 int len)
00057 {
00058 int i;
00059 vector float d, s0, s1, s2, t0, t1, edges;
00060 vector unsigned char align = vec_lvsr(0,dst),
00061 mask = vec_lvsl(0, dst);
00062
00063 for (i=0; i<len-3; i+=4) {
00064 t0 = vec_ld(0, dst+i);
00065 t1 = vec_ld(15, dst+i);
00066 s0 = vec_ld(0, src0+i);
00067 s1 = vec_ld(0, src1+i);
00068 s2 = vec_ld(0, src2+i);
00069 edges = vec_perm(t1 ,t0, mask);
00070 d = vec_madd(s0,s1,s2);
00071 t1 = vec_perm(d, edges, align);
00072 t0 = vec_perm(edges, d, align);
00073 vec_st(t1, 15, dst+i);
00074 vec_st(t0, 0, dst+i);
00075 }
00076 }
00077
00078 static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len)
00079 {
00080 vector float zero, t0, t1, s0, s1, wi, wj;
00081 const vector unsigned char reverse = vcprm(3,2,1,0);
00082 int i,j;
00083
00084 dst += len;
00085 win += len;
00086 src0+= len;
00087
00088 zero = (vector float)vec_splat_u32(0);
00089
00090 for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
00091 s0 = vec_ld(i, src0);
00092 s1 = vec_ld(j, src1);
00093 wi = vec_ld(i, win);
00094 wj = vec_ld(j, win);
00095
00096 s1 = vec_perm(s1, s1, reverse);
00097 wj = vec_perm(wj, wj, reverse);
00098
00099 t0 = vec_madd(s0, wj, zero);
00100 t0 = vec_nmsub(s1, wi, t0);
00101 t1 = vec_madd(s0, wi, zero);
00102 t1 = vec_madd(s1, wj, t1);
00103 t1 = vec_perm(t1, t1, reverse);
00104
00105 vec_st(t0, i, dst);
00106 vec_st(t1, j, dst);
00107 }
00108 }
00109
00110 void ff_float_init_altivec(DSPContext* c, AVCodecContext *avctx)
00111 {
00112 c->vector_fmul_reverse = vector_fmul_reverse_altivec;
00113 c->vector_fmul_add = vector_fmul_add_altivec;
00114 if(!(avctx->flags & CODEC_FLAG_BITEXACT)) {
00115 c->vector_fmul_window = vector_fmul_window_altivec;
00116 }
00117 }